diff options
| author | Přemysl Janouch <p.janouch@gmail.com> | 2016-12-22 22:58:20 +0100 | 
|---|---|---|
| committer | Přemysl Janouch <p.janouch@gmail.com> | 2016-12-22 23:26:41 +0100 | 
| commit | 128fb157b302f0f0881844fc7a2fa5653c74a197 (patch) | |
| tree | 93f342b5a7a195af211f17ab7044bac2064b81fc | |
| download | bfc-128fb157b302f0f0881844fc7a2fa5653c74a197.tar.gz bfc-128fb157b302f0f0881844fc7a2fa5653c74a197.tar.xz bfc-128fb157b302f0f0881844fc7a2fa5653c74a197.zip | |
Initial commit
| -rw-r--r-- | LICENSE | 15 | ||||
| -rw-r--r-- | Makefile | 12 | ||||
| -rw-r--r-- | README.adoc | 49 | ||||
| -rw-r--r-- | bfc-amd64-linux.c | 723 | ||||
| -rw-r--r-- | interpreters/Makefile | 14 | ||||
| -rw-r--r-- | interpreters/README.adoc | 15 | ||||
| -rw-r--r-- | interpreters/bf-faster-loops.c | 151 | ||||
| -rw-r--r-- | interpreters/bf-jit-opt.c | 495 | ||||
| -rw-r--r-- | interpreters/bf-jit-unsafe-opt.c | 617 | ||||
| -rw-r--r-- | interpreters/bf-jit-unsafe.c | 495 | ||||
| -rw-r--r-- | interpreters/bf-jit.c | 327 | ||||
| -rw-r--r-- | interpreters/bf-optimizing.c | 213 | ||||
| -rw-r--r-- | interpreters/bf.c | 160 | 
13 files changed, 3286 insertions, 0 deletions
| @@ -0,0 +1,15 @@ + Copyright (c) 2016, Přemysl Janouch <p.janouch@gmail.com> + All rights reserved. +  + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. +  + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5bb5db1 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +# All we need is C99 and POSIX, which this should make available +CFLAGS = -std=gnu99 +NAMES = bfc-amd64-linux + +all: $(NAMES) + +%: %.c +	$(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ +clean: +	rm -f $(NAMES) + +.PHONY: all clean diff --git a/README.adoc b/README.adoc new file mode 100644 index 0000000..660c38c --- /dev/null +++ b/README.adoc @@ -0,0 +1,49 @@ +bfc +=== + +'bfc' is a small, fast, self-contained, optimizing Brainfuck compiler for Linux +on Intel x86-64. + +Also included are several interpreters in various states of sophistication that +document my progress as I was writing this, from the simplest approach to an +optimizing JIT compiler. + +It's pretty easy to retarget the compiler, it just means redoing half the work. +The compiler itself is platform agnostic. + +Building +-------- +Build dependencies: a C99 compiler + +Runtime dependencies: Linux + + $ git clone https://github.com/pjanouch/bfc.git + $ cd bfc + $ make + +To obtain dumps of the intermediate representation, compile with `-DDEBUG`: + + $ make CPPFLAGS=-DDEBUG + +Usage +----- + + ./bfc-amd64-linux [INPUT-FILE] [OUTPUT-FILE] + +When no input file is specified, stdin is used.  Similarly, the default output +filename is a.out.  The resulting file can be run on the target platform. + +Contributing and Support +------------------------ +Use this project's GitHub to report any bugs, request features, or submit pull +requests.  If you want to discuss this project, or maybe just hang out with +the developer, feel free to join me at irc://irc.janouch.name, channel #dev. + +License +------- +'bfc' is written by Přemysl Janouch <p.janouch@gmail.com>. + +You may use the software under the terms of the ISC license, the text of which +is included within the package, or, at your option, you may relicense the work +under the MIT or the Modified BSD License, as listed at the following site: + +http://www.gnu.org/licenses/license-list.html diff --git a/bfc-amd64-linux.c b/bfc-amd64-linux.c new file mode 100644 index 0000000..039e224 --- /dev/null +++ b/bfc-amd64-linux.c @@ -0,0 +1,723 @@ +// This is an exercise in futility more than anything else +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> +#include <errno.h> + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ +	void *p = calloc (m, n); +	if (!p) +		exit_fatal ("calloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->len = 0; +	self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +enum command +{ +	RIGHT, LEFT, INC, DEC, IN, OUT, BEGIN, END, +	SET, EAT, INCACC, DECACC +}; + +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; int offset; size_t arg; }; +#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) } + +// - - Debugging - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump_instruction (FILE *fp, const struct instruction *in) +{ +	const char *name; +	switch (in->cmd) +	{ +	case RIGHT:  name = "RIGHT "; break; +	case LEFT:   name = "LEFT  "; break; +	case INC:    name = "INC   "; break; +	case DEC:    name = "DEC   "; break; +	case OUT:    name = "OUT   "; break; +	case IN:     name = "IN    "; break; +	case BEGIN:  name = "BEGIN "; break; +	case END:    name = "END   "; break; +	case SET:    name = "SET   "; break; +	case EAT:    name = "EAT   "; break; +	case INCACC: name = "INCACC"; break; +	case DECACC: name = "DECACC"; break; +	} +	fprintf (fp, "%s %zu", name, in->arg); +	if (in->offset != 0) +		fprintf (fp, " [%d]", in->offset); +	fprintf (fp, "\n"); +} + +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ +	FILE *fp = fopen (filename, "w"); +	long indent = 0; +	for (size_t i = 0; i < len; i++) +	{ +		if (in[i].cmd == END) +			indent--; +		for (long k = 0; k < indent; k++) +			fputs ("  ", fp); +		debug_dump_instruction (fp, &in[i]); +		if (in[i].cmd == BEGIN) +			indent++; +	} +	fclose (fp); +} +#else +#define debug_dump(...) +#endif + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static size_t +optimize_assignment (struct instruction *irb, size_t irb_len) +{ +	size_t in = 0, out = 0; +	for (; in < irb_len; in++, out++) +	{ +		if (in + 2 < irb_len +		 && irb[in    ].cmd == BEGIN +		 && irb[in + 1].cmd == DEC && irb[in + 1].arg == 1 +		 && irb[in + 2].cmd == END) +		{ +			irb[out] = INSTRUCTION (SET, 0, 0); +			in += 2; +		} +		else if (out && irb[out - 1].cmd == SET && irb[in].cmd == INC) +			irb[--out].arg += irb[in].arg; +		else if (out != in) +			irb[out] = irb[in]; +	} +	return out; +} + +// Add offsets to INC/DEC/SET stuck between LEFT/RIGHT +// and compress the LEFT/RIGHT sequences +static size_t +optimize_offseted_inc_dec (struct instruction *irb, size_t irb_len) +{ +	size_t in = 0, out = 0; +	for (in = 0, out = 0; in < irb_len; in++, out++) +	{ +		intptr_t dir = 0; +		if (irb[in].cmd == RIGHT) +			dir = irb[in].arg; +		else if (irb[in].cmd == LEFT) +			dir = -(intptr_t) irb[in].arg; +		else +		{ +			irb[out] = irb[in]; +			continue; +		} + +		while (in + 2 < irb_len) +		{ +			// An immediate offset has its limits on x86-64 +			if (dir < INT8_MIN || dir > INT8_MAX) +				break; + +			intptr_t diff; +			if (irb[in + 2].cmd == RIGHT) +				diff = irb[in + 2].arg; +			else if (irb[in + 2].cmd == LEFT) +				diff = -(intptr_t) irb[in + 2].arg; +			else +				break; + +			int cmd = irb[in + 1].cmd; +			if (cmd != INC && cmd != DEC && cmd != SET) +				break; + +			irb[out] = irb[in + 1]; +			irb[out].offset = dir; + +			dir += diff; +			out += 1; +			in += 2; +		} + +		for (; in + 1 < irb_len; in++) +		{ +			if (irb[in + 1].cmd == RIGHT) +				dir += irb[in + 1].arg; +			else if (irb[in + 1].cmd == LEFT) +				dir -= (intptr_t) irb[in + 1].arg; +			else +				break; +		} + +		if (!dir) +			out--; +		else if (dir > 0) +			irb[out] = INSTRUCTION (RIGHT, 0, dir); +		else +			irb[out] = INSTRUCTION (LEFT, 0, -dir); +	} +	return out; +} + +// Try to eliminate loops that eat a cell and add/subtract its value +// to/from some other cell +static size_t +optimize_inc_dec_loops (struct instruction *irb, size_t irb_len) +{ +	size_t in = 0, out = 0; +	for (in = 0, out = 0; in < irb_len; in++, out++) +	{ +		irb[out] = irb[in]; +		if (irb[in].cmd != BEGIN) +			continue; + +		bool ok = false; +		size_t count = 0; +		for (size_t k = in + 1; k < irb_len; k++) +		{ +			if (irb[k].cmd == END) +			{ +				ok = true; +				break; +			} +			if (irb[k].cmd != INC +			 && irb[k].cmd != DEC) +				break; +			count++; +		} +		if (!ok) +			continue; + +		// Stable sort operations by their offsets, put [0] first +		bool sorted; +		do +		{ +			sorted = true; +			for (size_t k = 1; k < count; k++) +			{ +				if (irb[in + k].offset == 0) +					continue; +				if (irb[in + k + 1].offset != 0 +				 && irb[in + k].offset <= irb[in + k + 1].offset) +					continue; + +				struct instruction tmp = irb[in + k + 1]; +				irb[in + k + 1] = irb[in + k]; +				irb[in + k] = tmp; +				sorted = false; +			} +		} +		while (!sorted); + +		// Abort the optimization on duplicate offsets (complication with [0]) +		for (size_t k = 1; k < count; k++) +			if (irb[in + k].offset == irb[in + k + 1].offset) +				ok = false; +		// XXX: can't make the code longer either +		for (size_t k = 1; k <= count; k++) +			if (irb[in + k].arg != 1) +				ok = false; +		if (!ok +		 || irb[in + 1].cmd != DEC +		 || irb[in + 1].offset != 0) +			continue; + +		int min_safe_left_offset = 0; +		if (in > 1 && irb[in - 1].cmd == RIGHT) +			min_safe_left_offset = -irb[in - 1].arg; + +		bool cond_needed_for_safety = false; +		for (size_t k = 0; k < count; k++) +			if (irb[in + k + 1].offset < min_safe_left_offset) +			{ +				cond_needed_for_safety = true; +				break; +			} + +		in++; +		if (cond_needed_for_safety) +			out++; + +		irb[out] = INSTRUCTION (EAT, 0, 0); +		for (size_t k = 1; k < count; k++) +			irb[out + k] = INSTRUCTION (irb[in + k].cmd == INC +				? INCACC : DECACC, irb[in + k].offset, 0); + +		in += count; +		out += count; + +		if (cond_needed_for_safety) +			irb[out] = INSTRUCTION (END, 0, 0); +		else +			out--; +	} +	return out; +} + +// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static void +pair_loops (struct instruction *irb, size_t irb_len) +{ +	size_t nesting = 0; +	size_t *stack = xcalloc (sizeof *stack, irb_len); +	for (size_t i = 0; i < irb_len; i++) +	{ +		switch (irb[i].cmd) +		{ +		case BEGIN: +			stack[nesting++] = i; +			break; +		case END: +			if (nesting <= 0) +				exit_fatal ("unbalanced loops\n"); + +			--nesting; +			irb[stack[nesting]].arg = i + 1; + +			// Looping can be disabled by optimizations +			if (irb[i].arg) +				irb[i].arg = stack[nesting] + 1; +		default: +			break; +		} +	} +	free (stack); + +	if (nesting != 0) +		exit_fatal ("unbalanced loops\n"); +} + +// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +int +main (int argc, char *argv[]) +{ +	if (argc > 3) +		exit_fatal ("usage: %s [INPUT-FILE]\n", argv[0]); + +	FILE *input_file = stdin; +	if (argc > 1 && !(input_file = fopen (argv[1], "r"))) +		exit_fatal ("fopen: %s: %s\n", argv[1], strerror (errno)); + +	const char *output_path = "a.out"; +	if (argc > 2) +		output_path = argv[2]; + +	struct str buffer; +	str_init (&buffer); + +	int c; +	while ((c = fgetc (input_file)) != EOF) +		str_append_c (&buffer, c); +	if (ferror (input_file)) +		exit_fatal ("can't read program\n"); +	fclose (input_file); + +// - - Decode, group and optimize  - - - - - - - - - - - - - - - - - - - - - - - + +	// This is our Intermediate Representation Buffer +	struct instruction *irb = xcalloc (sizeof *irb, buffer.len); +	size_t irb_len = 0; + +	for (size_t i = 0; i < buffer.len; i++) +	{ +		enum command cmd; +		switch (buffer.str[i]) +		{ +		case '>': cmd = RIGHT; break; +		case '<': cmd = LEFT;  break; +		case '+': cmd = INC;   break; +		case '-': cmd = DEC;   break; +		case '.': cmd = OUT;   break; +		case ',': cmd = IN;    break; +		case '[': cmd = BEGIN; break; +		case ']': cmd = END;   break; +		default:  continue; +		} + +		// The most basic optimization is to group identical commands together +		if (!irb_len || !grouped[cmd] || irb[irb_len - 1].cmd != cmd) +			irb_len++; + +		irb[irb_len - 1].cmd = cmd; +		irb[irb_len - 1].arg++; +	} + +	debug_dump ("bf-no-opt.txt",            irb, irb_len); +	irb_len = optimize_assignment          (irb, irb_len); +	debug_dump ("bf-pre-offsets.txt",       irb, irb_len); +	irb_len = optimize_offseted_inc_dec    (irb, irb_len); +	debug_dump ("bf-pre-incdec-unloop.txt", irb, irb_len); +	irb_len = optimize_inc_dec_loops       (irb, irb_len); +	debug_dump ("bf-optimized.txt",         irb, irb_len); +	pair_loops                             (irb, irb_len); +	debug_dump ("bf-final.txt",             irb, irb_len); + +// - - Code generation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	str_init (&buffer); +	size_t *offsets    = xcalloc (sizeof *offsets,    irb_len + 1); +	bool   *sets_flags = xcalloc (sizeof *sets_flags, irb_len); + +#define CODE(x) { char t[] = x; str_append_data (&buffer, t, sizeof t - 1); } +#define LE(v) (uint8_t[]) { v, v>>8, v>>16, v>>24, v>>32, v>>40, v>>48, v>>56 } +#define DB(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 1); } +#define DW(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 2); } +#define DD(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 4); } +#define DQ(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 8); } + +	enum +	{ +		ELF_LOAD_CODE = 0x400000,             // where code is loaded (usual) +		ELF_LOAD_DATA = 0x800000              // where the tape is placed +	}; + +	CODE ("\xB8") DD (ELF_LOAD_DATA)          // mov rax, "ELF_LOAD_DATA" +	CODE ("\x30\xDB")                         // xor bl, bl + +	for (size_t i = 0; i < irb_len; i++) +	{ +		offsets[i] = buffer.len; + +		size_t arg = irb[i].arg; +		assert (arg <= UINT32_MAX); + +		int offset = irb[i].offset; +		assert (offset <= INT8_MAX && offset >= INT8_MIN); + +		// Don't save what we've just loaded +		if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT) +			if (i < 2 || i + 1 >= irb_len +			 || (irb[i - 2].cmd != LEFT && irb[i - 2].cmd != RIGHT) +			 || irb[i - 1].cmd != BEGIN +			 || irb[i + 1].cmd != END) +				CODE ("\x88\x18")             // mov [rax], bl + +		switch (irb[i].cmd) +		{ +		case RIGHT: +			// add rax, "arg" -- optimistic, no boundary checking +			if (arg > INT8_MAX) { CODE ("\x48\x05")     DD (arg) } +			else                { CODE ("\x48\x83\xC0") DB (arg) } +			break; +		case LEFT: +			// sub rax, "arg" -- optimistic, no boundary checking +			if (arg > INT8_MAX) { CODE ("\x48\x2D")     DD (arg) } +			else                { CODE ("\x48\x83\xE8") DB (arg) } +			break; + +		case EAT: +			// NOTE: the kernel destroys rcx and r11 on syscalls, +			//   there must be no OUT or IN between EAT and INCACC/DECACC +			CODE ("\x88\xD9" "\x30\xDB")      // mov cl, bl; xor bl, bl +			sets_flags[i] = true; +			break; +		case INCACC: +			if (offset) +			{ +				CODE ("\x00\x48") DB (offset) // add [rax+"offset"], cl +			} +			else +			{ +				CODE ("\x00\xCB")             // add bl, cl +				sets_flags[i] = true; +			} +			break; +		case DECACC: +			if (offset) +			{ +				CODE ("\x28\x48") DB (offset) // sub [rax+"offset"], cl +			} +			else +			{ +				CODE ("\x28\xCB")             // sub bl, cl +				sets_flags[i] = true; +			} +			break; + +		case INC: +			if (offset) +			{ +				CODE ("\x80\x40") DB (offset) // add byte [rax+"offset"], "arg" +			} +			else +			{ +				CODE ("\x80\xC3")             // add bl, "arg" +				sets_flags[i] = true; +			} +			DB (arg) +			break; +		case DEC: +			if (offset) +			{ +				CODE ("\x80\x68") DB (offset) // sub byte [rax+"offset"], "arg" +			} +			else +			{ +				CODE ("\x80\xEB")             // sub bl, "arg" +				sets_flags[i] = true; +			} +			DB (arg) +			break; +		case SET: +			if (offset) +			{ +				CODE ("\xC6\x40") DB (offset) // mov byte [rax+"offset"], "arg" +			} +			else +				CODE ("\xB3")                 // mov bl, "arg" +			DB (arg) +			break; + +		case OUT: +			CODE ("\xE8") DD (0)              // call "write" +			break; +		case IN: +			CODE ("\xE8") DD (0)              // call "read" +			break; + +		case BEGIN: +			// Don't test the register when the flag has been set already; +			// this doesn't have much of an effect in practice +			if (!i || !sets_flags[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[arg]" +			break; +		case END: +			// We know that the cell is zero, make this an "if", not a "loop"; +			// this doesn't have much of an effect in practice +			if (!arg) +				break; + +			if (!i || !sets_flags[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[arg]" +			break; +		} + +		// No sense in reading it out when we overwrite it immediately; +		// this doesn't have much of an effect in practice +		if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT) +			if (i + 1 >= irb_len +			 || irb[i + 1].cmd != SET +			 || irb[i + 1].offset != 0) +				CODE ("\x8A\x18")             // mov bl, [rax] +	} +	// When there is a loop at the end we need to be able to jump past it +	offsets[irb_len] = buffer.len; + +	// Write an epilog which handles all the OS interfacing +	// +	// System V x86-64 ABI: +	//   rax <-> both syscall number and return value +	//   args -> rdi, rsi, rdx, r10, r8, r9 +	//   trashed <- rcx, r11 + +	enum { SYS_READ = 0, SYS_WRITE = 1, SYS_EXIT = 60 }; + +	CODE ("\xB8") DD (SYS_EXIT)  // mov eax, 0x3c +	CODE ("\x48\x31\xFF")        // xor rdi, rdi +	CODE ("\x0F\x05")            // syscall + +	size_t fatal_offset = buffer.len; +	CODE ("\x48\x89\xF7")        // mov rdi, rsi -- use the string in rsi +	CODE ("\x30\xC0")            // xor al, al -- look for the nil byte +	CODE ("\x48\x31\xC9")        // xor rcx, rcx +	CODE ("\x48\xF7\xD1")        // not rcx -- start from -1 +	CODE ("\xFC" "\xF2\xAE")     // cld; repne scasb -- decrement until found +	CODE ("\x48\xF7\xD1")        // not rcx +	CODE ("\x48\x8D\x51\xFF")    // lea rdx, [rcx-1] -- save length in rdx +	CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE" +	CODE ("\xBF") DD (2)         // mov edi, "STDERR_FILENO" +	CODE ("\x0F\x05")            // syscall + +	CODE ("\xB8") DD (SYS_EXIT)  // mov eax, "SYS_EXIT" +	CODE ("\xBF") DD (1)         // mov edi, "EXIT_FAILURE" +	CODE ("\x0F\x05")            // syscall + +	size_t read_offset = buffer.len; +	CODE ("\x50")                // push rax -- save tape position +	CODE ("\xB8") DD (SYS_READ)  // mov eax, "SYS_READ" +	CODE ("\x48\x89\xC7")        // mov rdi, rax -- STDIN_FILENO +	CODE ("\x66\x6A\x00")        // push word 0 -- the default value for EOF +	CODE ("\x48\x89\xE6")        // mov rsi, rsp -- the char starts at rsp +	CODE ("\xBA") DD (1)         // mov edx, 1 -- count +	CODE ("\x0F\x05")            // syscall +	CODE ("\x66\x5B")            // pop bx + +	CODE ("\x48\x83\xF8\x00")    // cmp rax, 0 +	CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel read_message] +	CODE ("\x7C")                // jl "fatal_offset" -- write failure message +	DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1)) +	CODE ("\x58")                // pop rax -- restore tape position +	CODE ("\xC3")                // ret +	CODE ("fatal: read failed\n\0") + +	size_t write_offset = buffer.len; +	CODE ("\x50")                // push rax -- save tape position +	CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE" +	CODE ("\x48\x89\xC7")        // mov rdi, rax -- STDOUT_FILENO +	CODE ("\x66\x53")            // push bx +	CODE ("\x48\x89\xE6")        // mov rsi, rsp -- the char starts at rsp +	CODE ("\xBA") DD (1)         // mov edx, 1 -- count +	CODE ("\x0F\x05")            // syscall +	CODE ("\x66\x5B")            // pop bx + +	CODE ("\x48\x83\xF8\x00")    // cmp rax, 0 +	CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel write_message] +	CODE ("\x7C")                // jl "fatal_offset" -- write failure message +	DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1)) +	CODE ("\x58")                // pop rax -- restore tape position +	CODE ("\xC3")                // ret +	CODE ("fatal: write failed\n\0") + +	// Now that we know where each instruction is, fill in relative jumps +	for (size_t i = 0; i < irb_len; i++) +	{ +		if (!irb[i].arg) +			continue; + +		// This must accurately reflect the code generators +		intptr_t target, fixup = offsets[i]; +		if (irb[i].cmd == BEGIN || irb[i].cmd == END) +		{ +			fixup += (i && sets_flags[i - 1]) ? 2 : 4; +			target = offsets[irb[i].arg]; +		} +		else if (irb[i].cmd == IN)  { fixup++; target = read_offset;  } +		else if (irb[i].cmd == OUT) { fixup++; target = write_offset; } +		else continue; + +		uint64_t v = target - (fixup + 4); +		memcpy (buffer.str + fixup, LE (v), 4); +	} +	free (offsets); +	free (sets_flags); + +// - - Output  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Now that we know how long the machine code is, we can write the header. +	// Note that for PIE we would need to depend on the dynamic linker, so no. +	// +	// Recommended reading: +	//   http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html +	//   man 5 elf + +	struct str code = buffer; +	str_init (&buffer); + +	enum +	{ +		ELF_HEADER_SIZE = 64,           // size of the ELF header +		ELF_PROGRAM_ENTRY_SIZE = 56,    // size of a program header +		ELF_META_SIZE = ELF_HEADER_SIZE + 2 * ELF_PROGRAM_ENTRY_SIZE +	}; + +	// ELF header +	CODE ("\x7F" "ELF\x02\x01\x01")     // ELF, 64-bit, little endian, v1 +	CODE ("\x00\x00" "\0\0\0\0\0\0\0")  // Unix System V ABI, v0, padding +	DW (2) DW (62) DD (1)               // executable, x86-64, v1 +	DQ (ELF_LOAD_CODE + ELF_META_SIZE)  // entry point address +	DQ (ELF_HEADER_SIZE) DQ (0)         // program, section header offset +	DD (0)                              // no processor-specific flags +	DW (ELF_HEADER_SIZE)                // ELF header size +	DW (ELF_PROGRAM_ENTRY_SIZE) DW (2)  // program hdr tbl entry size, count +	DW (0) DW (0)                       // section hdr tbl entry size, count +	DW (0)                              // no section index for strings + +	// Program header for code +	// The entry point address seems to require alignment, so map start of file +	DD (1) DD (5)                       // PT_LOAD, PF_R | PF_X +	DQ (0)                              // offset within the file +	DQ (ELF_LOAD_CODE)                  // address in virtual memory +	DQ (ELF_LOAD_CODE)                  // address in physical memory +	DQ (code.len + ELF_META_SIZE)       // length within the file +	DQ (code.len + ELF_META_SIZE)       // length within memory +	DQ (4096)                           // segment alignment + +	// Program header for the tape +	DD (1) DD (6)                       // PT_LOAD, PF_R | PF_W +	DQ (0)                              // offset within the file +	DQ (ELF_LOAD_DATA)                  // address in virtual memory +	DQ (ELF_LOAD_DATA)                  // address in physical memory +	DQ (0)                              // length within the file +	DQ (1 << 20)                        // one megabyte of memory +	DQ (4096)                           // segment alignment + +	// The section header table is optional and we don't need it for anything + +	FILE *output_file; +	if (!(output_file = fopen (output_path, "w"))) +		exit_fatal ("fopen: %s: %s\n", output_path, strerror (errno)); + +	fwrite (buffer.str, buffer.len, 1, output_file); +	fwrite (code.str, code.len, 1, output_file); +	fclose (output_file); +	return 0; +} diff --git a/interpreters/Makefile b/interpreters/Makefile new file mode 100644 index 0000000..3e73cf5 --- /dev/null +++ b/interpreters/Makefile @@ -0,0 +1,14 @@ +CC = c99 +CFLAGS = -O3 + +NAMES = bf bf-faster-loops bf-optimizing \ +	bf-jit bf-jit-opt bf-jit-unsafe bf-jit-unsafe-opt + +all: $(NAMES) + +%: %.c +	$(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ +clean: +	rm -f $(NAMES) + +.PHONY: all clean diff --git a/interpreters/README.adoc b/interpreters/README.adoc new file mode 100644 index 0000000..ecde608 --- /dev/null +++ b/interpreters/README.adoc @@ -0,0 +1,15 @@ +This directory contains several Brainfuck interpreters in various states of +sophistication, from the simplest approach to an optimizing JIT compiler: + + * `bf.c` is the stupidest one and the oldest by far + * `bf-faster-loops.c` precomputes loop jumps + * `bf-optimizing.c` improves on that by changing `[-]+` loops into assignments + * `bf-jit.c` adds JIT compilation for Intel x86-64 + * `bf-jit-opt.c` tries a bit harder to avoid looping on the current value + * `bf-jit-unsafe.c` abolishes all boundary checks when moving across the tape + * `bf-jit-unsafe-opt.c` makes use of immediate offsets to modify values + +I recommend using a tool such as 'meld' to view the differences. + +Just run `make` in this directory to have them all built, and append +`CPPFLAGS=-DDEBUG` to get dumps of the IR for the more sophisticated JITs. diff --git a/interpreters/bf-faster-loops.c b/interpreters/bf-faster-loops.c new file mode 100644 index 0000000..e301d95 --- /dev/null +++ b/interpreters/bf-faster-loops.c @@ -0,0 +1,151 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <assert.h> +#include <errno.h> + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xmalloc (size_t n) +{ +	void *p = malloc (n); +	if (!p) +		exit_fatal ("malloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->alloc = 16; +	self->len = 0; +	self->str = strcpy (xmalloc (self->alloc), ""); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ +	struct str program; str_init (&program); +	struct str data;    str_init (&data); + +	int c; +	while ((c = fgetc (stdin)) != EOF) +		str_append_c (&program, c); +	if (ferror (stdin)) +		exit_fatal ("can't read program\n"); + +	FILE *input = fopen ("/dev/tty", "rb"); +	if (!input) +		exit_fatal ("can't open terminal for reading\n"); + +	size_t *pairs = xmalloc (sizeof *pairs * program.len); +	size_t *stack = xmalloc (sizeof *stack * program.len); + +	size_t nesting = 0; +	for (size_t i = 0; i < program.len; i++) +	{ +		switch (program.str[i]) +		{ +		case '[': +			stack[nesting++] = i; +			break; +		case ']': +			assert (nesting > 0); + +			--nesting; +			pairs[stack[nesting]] = i; +			pairs[i] = stack[nesting]; +		} +	} +	assert (nesting == 0); + +	size_t dataptr = 0; +	str_append_c (&data, 0); + +	for (size_t i = 0; i < program.len; i++) +	{ +		switch (program.str[i]) +		{ +		case '>': +			assert (dataptr != SIZE_MAX); +			if (++dataptr == data.len) +				str_append_c (&data, 0); +			break; +		case '<': +			assert (dataptr != 0); +			dataptr--; +			break; + +		case '+': data.str[dataptr]++; break; +		case '-': data.str[dataptr]--; break; + +		case '.': +			fputc (data.str[dataptr], stdout); +			break; +		case ',': +			data.str[dataptr] = c = fgetc (input); +			assert (c != EOF); +			break; + +		case '[': if (!data.str[dataptr]) i = pairs[i]; break; +		case ']': if ( data.str[dataptr]) i = pairs[i]; break; + +		default: +			break; +		} +	} +	return 0; +} diff --git a/interpreters/bf-jit-opt.c b/interpreters/bf-jit-opt.c new file mode 100644 index 0000000..3b24ad3 --- /dev/null +++ b/interpreters/bf-jit-opt.c @@ -0,0 +1,495 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> +#include <errno.h> + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ +	#include <sys/mman.h> +#else +	#error Platform not supported +#endif + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ +	void *p = calloc (m, n); +	if (!p) +		exit_fatal ("calloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->len = 0; +	self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +struct str data;                        ///< Data tape +volatile size_t dataptr;                ///< Current location on the tape +FILE *input;                            ///< User input + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END, +	EAT, INCACC, DECACC }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; +#define INSTRUCTION(c, a) (struct instruction) { (c), (a) } + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// Some things I just really don't want to write in assembly even though it +// is effectively a big performance hit, eliminating the advantage of JIT + +static void +right (size_t arg) +{ +	assert (SIZE_MAX - dataptr > arg); +	dataptr += arg; + +	while (dataptr >= data.len) +		str_append_c (&data, 0); +} + +static void +left (size_t arg) +{ +	assert (dataptr >= arg); +	dataptr -= arg; +} + +static void +cin (void) +{ +	int c; +	data.str[dataptr] = c = fgetc (input); +	assert (c != EOF); +} + +// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ +	FILE *fp = fopen (filename, "w"); +	long indent = 0; +	for (size_t i = 0; i < len; i++) +	{ +		if (in[i].cmd == END) +			indent--; +		for (long k = 0; k < indent; k++) +			fprintf (fp, "  "); + +		switch (in[i].cmd) +		{ +		case RIGHT:  fprintf (fp, "RIGHT  %zu\n", in[i].arg); break; +		case LEFT:   fprintf (fp, "LEFT   %zu\n", in[i].arg); break; +		case INC:    fprintf (fp, "INC    %zu\n", in[i].arg); break; +		case DEC:    fprintf (fp, "DEC    %zu\n", in[i].arg); break; +		case OUT:    fprintf (fp, "OUT    %zu\n", in[i].arg); break; +		case IN:     fprintf (fp, "IN     %zu\n", in[i].arg); break; +		case BEGIN:  fprintf (fp, "BEGIN  %zu\n", in[i].arg); break; +		case END:    fprintf (fp, "END    %zu\n", in[i].arg); break; +		case SET:    fprintf (fp, "SET    %zu\n", in[i].arg); break; +		case EAT:    fprintf (fp, "EAT    %zu\n", in[i].arg); break; +		case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break; +		case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break; +		} +		if (in[i].cmd == BEGIN) +			indent++; +	} +	fclose (fp); +} +#else +#define debug_dump(...) +#endif + +int +main (int argc, char *argv[]) +{ +	(void) argc; +	(void) argv; + +	struct str program; +	str_init (&program); + +	int c; +	while ((c = fgetc (stdin)) != EOF) +		str_append_c (&program, c); +	if (ferror (stdin)) +		exit_fatal ("can't read program\n"); +	if (!(input = fopen ("/dev/tty", "rb"))) +		exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	struct instruction *parsed = xcalloc (sizeof *parsed, program.len); +	size_t parsed_len = 0; + +	for (size_t i = 0; i < program.len; i++) +	{ +		enum command cmd; +		switch (program.str[i]) +		{ +		case '>': cmd = RIGHT; break; +		case '<': cmd = LEFT;  break; +		case '+': cmd = INC;   break; +		case '-': cmd = DEC;   break; +		case '.': cmd = OUT;   break; +		case ',': cmd = IN;    break; +		case '[': cmd = BEGIN; break; +		case ']': cmd = END;   break; +		default:  continue; +		} + +		// The most basic optimization is to group identical commands together +		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) +			parsed_len++; + +		parsed[parsed_len - 1].cmd = cmd; +		parsed[parsed_len - 1].arg++; +	} + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	debug_dump ("bf-no-opt.txt", parsed, parsed_len); + +	size_t in = 0, out = 0; +	for (; in < parsed_len; in++, out++) +	{ +		// This shows up in mandelbrot.bf a lot but actually helps hanoi.bf +		if (in + 5 < parsed_len +		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + +		 && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT +		 && parsed[in + 2].arg == parsed[in + 4].arg + +		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) +		 && parsed[in + 3].arg == 1) +		{ +			// This mustn't make the move when the cell is zero already +			parsed[out] = parsed[in]; +			parsed[out + 1] = INSTRUCTION (EAT, 0); +			parsed[out + 2] = parsed[in + 2]; +			parsed[out + 3] = INSTRUCTION +				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); +			parsed[out + 4] = parsed[in + 4]; +			// This disables the looping further in the code; +			// this doesn't have much of an effect in practice +			parsed[out + 5] = INSTRUCTION (END, 0); +			in += 5; +			out += 5; +		} +		// The simpler case that cannot crash and thus can avoid the loop +		else if (in + 5 < parsed_len +		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + +		 && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT +		 && parsed[in + 2].arg == parsed[in + 4].arg + +		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) +		 && parsed[in + 3].arg == 1) +		{ +			parsed[out] = INSTRUCTION (EAT, 0); +			parsed[out + 1] = parsed[in + 2]; +			parsed[out + 2] = INSTRUCTION +				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); +			parsed[out + 3] = parsed[in + 4]; +			in += 5; +			out += 3; +		} +		else if (in + 2 < parsed_len +		 && parsed[in    ].cmd == BEGIN +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 +		 && parsed[in + 2].cmd == END) +		{ +			parsed[out] = INSTRUCTION (SET, 0); +			in += 2; +		} +		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) +			parsed[--out].arg += parsed[in].arg; +		else if (out != in) +			parsed[out] = parsed[in]; +	} +	parsed_len = out; + +	for (in = 0, out = 0; in < parsed_len; in++, out++) +	{ +		ssize_t dir = 0; +		if (parsed[in].cmd == RIGHT) +			dir = parsed[in].arg; +		else if (parsed[in].cmd == LEFT) +			dir = -(ssize_t) parsed[in].arg; +		else +		{ +			parsed[out] = parsed[in]; +			continue; +		} + +		for (; in + 1 < parsed_len; in++) +		{ +			if (parsed[in + 1].cmd == RIGHT) +				dir += parsed[in + 1].arg; +			else if (parsed[in + 1].cmd == LEFT) +				dir -= (ssize_t) parsed[in + 1].arg; +			else +				break; +		} + +		if (!dir) +			out--; +		else if (dir > 0) +			parsed[out] = INSTRUCTION (RIGHT, dir); +		else +			parsed[out] = INSTRUCTION (LEFT, -dir); +	} +	parsed_len = out; + +	debug_dump ("bf-optimized.txt", parsed, parsed_len); + +// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t nesting = 0; +	size_t *stack = xcalloc (sizeof *stack, parsed_len); +	for (size_t i = 0; i < parsed_len; i++) +	{ +		switch (parsed[i].cmd) +		{ +		case BEGIN: +			stack[nesting++] = i; +			break; +		case END: +			assert (nesting > 0); + +			--nesting; +			parsed[stack[nesting]].arg = i + 1; + +			// Looping can be disabled by optimizations +			if (parsed[i].arg) +				parsed[i].arg = stack[nesting] + 1; +		default: +			break; +		} +	} +	free (stack); +	assert (nesting == 0); + +	debug_dump ("bf-final.txt", parsed, parsed_len); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; +	// while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + +	str_init (&program); +	size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); +	uint8_t *arith  = xcalloc (sizeof *arith,   parsed_len); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } +#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); } + +	CODE ("\x49\xBD") WORD (&dataptr)         // mov r13, qword "&dataptr" +	CODE ("\x49\xBF") WORD (&data.str)        // mov r15, qword "&data.str" +	CODE ("\x4D\x8B\x37")                     // mov r14, qword [r15] +	CODE ("\x30\xDB")                         // xor bl, bl + +	for (size_t i = 0; i < parsed_len; i++) +	{ +		offsets[i] = program.len; + +		size_t arg = parsed[i].arg; +		assert (arg <= UINT32_MAX); +		switch (parsed[i].cmd) +		{ +		case RIGHT: +			CODE ("\x41\x88\x1E")             // mov [r14], bl +			CODE ("\xBF") DWRD (arg)          // mov edi, "arg" +			CODE ("\x48\xB8") WORD (right)    // mov rax, "right" +			CODE ("\xFF\xD0")                 // call rax + +			// The data could get reallocated, so reload the address +			CODE ("\x4D\x8B\x37")             // mov r14, qword [r15] +			CODE ("\x4D\x03\x75\x00")         // add r14, [r13] +			break; +		case LEFT: +			CODE ("\x41\x88\x1E")             // mov [r14], bl +			CODE ("\xBF") DWRD (arg)          // mov edi, "arg" +			CODE ("\x49\x29\xFE")             // sub r14, rdi -- optimistic +			CODE ("\x48\xB8") WORD (left)     // mov rax, "left" +			CODE ("\xFF\xD0")                 // call rax +			break; + +		case EAT: +			CODE ("\x41\x88\xDC")             // mov r12b, bl +			CODE ("\x30\xDB")                 // xor bl, bl +			arith[i] = 1; +			break; +		case INCACC: +			CODE ("\x44\x00\xE3")             // add bl, r12b +			arith[i] = 1; +			break; +		case DECACC: +			CODE ("\x44\x28\xE3")             // sub bl, r12b +			arith[i] = 1; +			break; + +		case INC: +			CODE ("\x80\xC3")                 // add bl, "arg" +			str_append_c (&program, arg); +			arith[i] = 1; +			break; +		case DEC: +			CODE ("\x80\xEB")                 // sub bl, "arg" +			str_append_c (&program, arg); +			arith[i] = 1; +			break; +		case SET: +			CODE ("\xB3")                     // mov bl, "arg" +			str_append_c (&program, arg); +			break; + +		case OUT: +			CODE ("\x48\x0F\xB6\xFB")         // movzx rdi, bl +			CODE ("\x48\xBE") WORD (stdout)   // mov rsi, "stdout" +			CODE ("\x48\xB8") WORD (fputc)    // mov rax, "fputc" +			CODE ("\xFF\xD0")                 // call rax +			break; +		case IN: +			CODE ("\x48\xB8") WORD (cin)      // mov rax, "cin" +			CODE ("\xFF\xD0")                 // call rax +			CODE ("\x41\x8A\x1E")             // mov bl, [r14] +			break; + +		case BEGIN: +			// Don't test the register when the flag has been set already; +			// this doesn't have much of an effect in practice +			if (!i || !arith[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" +			break; +		case END: +			// We know that the cell is zero, make this an "if", not a "loop"; +			// this doesn't have much of an effect in practice +			if (!arg) +				break; + +			if (!i || !arith[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" +			break; +		} + +		// No sense in reading it out when we overwrite it immediately; +		// this doesn't have much of an effect in practice +		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) +			if (i + 1 >= parsed_len +			 || parsed[i + 1].cmd != SET) +				CODE ("\x41\x8A\x1E")         // mov bl, [r14] +	} +	// When there is a loop at the end we need to be able to jump past it +	offsets[parsed_len] = program.len; +	str_append_c (&program, '\xC3');          // ret + +	// Now that we know where each instruction is, fill in relative jumps; +	// this must accurately reflect code generators for BEGIN and END +	for (size_t i = 0; i < parsed_len; i++) +	{ +		if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END) +		 || !parsed[i].arg) +			continue; + +		size_t fixup = offsets[i] + 2; +		if (!i || !arith[i - 1]) +			fixup += 2; + +		*(int32_t *)(program.str + fixup) = +			((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); +	} +	free (offsets); +	free (arith); + +#ifdef DEBUG +	FILE *bin = fopen ("bf-jit.bin", "w"); +	fwrite (program.str, program.len, 1, bin); +	fclose (bin); +#endif + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Some systems may have W^X +	void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +	if (!executable) +		exit_fatal ("mmap: %s\n", strerror (errno)); + +	memcpy (executable, program.str, program.len); +	if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) +		exit_fatal ("mprotect: %s\n", strerror (errno)); + +	str_init (&data); +	str_append_c (&data, 0); +	((void (*) (void)) executable)(); +	return 0; +} diff --git a/interpreters/bf-jit-unsafe-opt.c b/interpreters/bf-jit-unsafe-opt.c new file mode 100644 index 0000000..88a7980 --- /dev/null +++ b/interpreters/bf-jit-unsafe-opt.c @@ -0,0 +1,617 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> +#include <errno.h> + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ +	#include <unistd.h> +	#include <sys/mman.h> +#else +	#error Platform not supported +#endif + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ +	void *p = calloc (m, n); +	if (!p) +		exit_fatal ("calloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->len = 0; +	self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END, +	EAT, INCACC, DECACC }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; int offset; size_t arg; }; +#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) } + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +FILE *input;                            ///< User input + +static int +cin (void) +{ +	int c = fgetc (input); +	assert (c != EOF); +	return c; +} + +// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ +	FILE *fp = fopen (filename, "w"); +	long indent = 0; +	for (size_t i = 0; i < len; i++) +	{ +		if (in[i].cmd == END) +			indent--; +		for (long k = 0; k < indent; k++) +			fprintf (fp, "  "); + +		switch (in[i].cmd) +		{ +		case RIGHT:  fputs ("RIGHT ", fp); break; +		case LEFT:   fputs ("LEFT  ", fp); break; +		case INC:    fputs ("INC   ", fp); break; +		case DEC:    fputs ("DEC   ", fp); break; +		case OUT:    fputs ("OUT   ", fp); break; +		case IN:     fputs ("IN    ", fp); break; +		case BEGIN:  fputs ("BEGIN ", fp); break; +		case END:    fputs ("END   ", fp); break; +		case SET:    fputs ("SET   ", fp); break; +		case EAT:    fputs ("EAT   ", fp); break; +		case INCACC: fputs ("INCACC", fp); break; +		case DECACC: fputs ("DECACC", fp); break; +		} +		fprintf (fp, " %zu [%d]\n", in[i].arg, in[i].offset); +		if (in[i].cmd == BEGIN) +			indent++; +	} +	fclose (fp); +} +#else +#define debug_dump(...) +#endif + +int +main (int argc, char *argv[]) +{ +	(void) argc; +	(void) argv; + +	struct str program; +	str_init (&program); + +	int c; +	while ((c = fgetc (stdin)) != EOF) +		str_append_c (&program, c); +	if (ferror (stdin)) +		exit_fatal ("can't read program\n"); +	if (!(input = fopen ("/dev/tty", "rb"))) +		exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	struct instruction *parsed = xcalloc (sizeof *parsed, program.len); +	size_t parsed_len = 0; + +	for (size_t i = 0; i < program.len; i++) +	{ +		enum command cmd; +		switch (program.str[i]) +		{ +		case '>': cmd = RIGHT; break; +		case '<': cmd = LEFT;  break; +		case '+': cmd = INC;   break; +		case '-': cmd = DEC;   break; +		case '.': cmd = OUT;   break; +		case ',': cmd = IN;    break; +		case '[': cmd = BEGIN; break; +		case ']': cmd = END;   break; +		default:  continue; +		} + +		// The most basic optimization is to group identical commands together +		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) +			parsed_len++; + +		parsed[parsed_len - 1].cmd = cmd; +		parsed[parsed_len - 1].arg++; +	} + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	debug_dump ("bf-no-opt.txt", parsed, parsed_len); + +	size_t in = 0, out = 0; +	for (; in < parsed_len; in++, out++) +	{ +		if (in + 2 < parsed_len +		 && parsed[in    ].cmd == BEGIN +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 +		 && parsed[in + 2].cmd == END) +		{ +			parsed[out] = INSTRUCTION (SET, 0, 0); +			in += 2; +		} +		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) +			parsed[--out].arg += parsed[in].arg; +		else if (out != in) +			parsed[out] = parsed[in]; +	} +	parsed_len = out; + +	debug_dump ("bf-pre-offsets.txt", parsed, parsed_len); + +	// Add offsets to INC/DEC/SET stuck between LEFT/RIGHT +	// and compress the LEFT/RIGHT sequences +	for (in = 0, out = 0; in < parsed_len; in++, out++) +	{ +		ssize_t dir = 0; +		if (parsed[in].cmd == RIGHT) +			dir = parsed[in].arg; +		else if (parsed[in].cmd == LEFT) +			dir = -(ssize_t) parsed[in].arg; +		else +		{ +			parsed[out] = parsed[in]; +			continue; +		} + +		while (in + 2 < parsed_len) +		{ +			// An immediate offset has its limits +			if (dir < INT8_MIN || dir > INT8_MAX) +				break; + +			ssize_t diff; +			if (parsed[in + 2].cmd == RIGHT) +				diff = parsed[in + 2].arg; +			else if (parsed[in + 2].cmd == LEFT) +				diff = -(ssize_t) parsed[in + 2].arg; +			else +				break; + +			int cmd = parsed[in + 1].cmd; +			if (cmd != INC && cmd != DEC && cmd != SET) +				break; + +			parsed[out] = parsed[in + 1]; +			parsed[out].offset = dir; + +			dir += diff; +			out += 1; +			in += 2; +		} + +		for (; in + 1 < parsed_len; in++) +		{ +			if (parsed[in + 1].cmd == RIGHT) +				dir += parsed[in + 1].arg; +			else if (parsed[in + 1].cmd == LEFT) +				dir -= (ssize_t) parsed[in + 1].arg; +			else +				break; +		} + +		if (!dir) +			out--; +		else if (dir > 0) +			parsed[out] = INSTRUCTION (RIGHT, 0, dir); +		else +			parsed[out] = INSTRUCTION (LEFT, 0, -dir); +	} +	parsed_len = out; + +	debug_dump ("bf-pre-incdec-unloop.txt", parsed, parsed_len); + +	// Try to eliminate loops that eat a cell and add/subtract its value +	// to/from some other cell +	for (in = 0, out = 0; in < parsed_len; in++, out++) +	{ +		parsed[out] = parsed[in]; +		if (parsed[in].cmd != BEGIN) +			continue; + +		bool ok = false; +		size_t count = 0; +		for (size_t k = in + 1; k < parsed_len; k++) +		{ +			if (parsed[k].cmd == END) +			{ +				ok = true; +				break; +			} +			if (parsed[k].cmd != INC +			 && parsed[k].cmd != DEC) +				break; +			count++; +		} +		if (!ok) +			continue; + +		// Stable sort operations by their offsets, put [0] first +		bool sorted; +		do +		{ +			sorted = true; +			for (size_t k = 1; k < count; k++) +			{ +				if (parsed[in + k].offset == 0) +					continue; +				if (parsed[in + k + 1].offset != 0 +				 && parsed[in + k].offset <= parsed[in + k + 1].offset) +					continue; + +				struct instruction tmp = parsed[in + k + 1]; +				parsed[in + k + 1] = parsed[in + k]; +				parsed[in + k] = tmp; +				sorted = false; +			} +		} +		while (!sorted); + +		// Abort the optimization on duplicate offsets (complication with [0]) +		for (size_t k = 1; k < count; k++) +			if (parsed[in + k].offset == parsed[in + k + 1].offset) +				ok = false; +		// XXX: can't make the code longer either +		for (size_t k = 1; k <= count; k++) +			if (parsed[in + k].arg != 1) +				ok = false; +		if (!ok +		 || parsed[in + 1].cmd != DEC +		 || parsed[in + 1].offset != 0) +			continue; + +		int min_safe_left_offset = 0; +		if (in > 1 && parsed[in - 1].cmd == RIGHT) +			min_safe_left_offset = -parsed[in - 1].arg; + +		bool cond_needed_for_safety = false; +		for (size_t k = 0; k < count; k++) +			if (parsed[in + k + 1].offset < min_safe_left_offset) +			{ +				cond_needed_for_safety = true; +				break; +			} + +		in++; +		if (cond_needed_for_safety) +			out++; + +		parsed[out] = INSTRUCTION (EAT, 0, 0); +		for (size_t k = 1; k < count; k++) +			parsed[out + k] = INSTRUCTION (parsed[in + k].cmd == INC +				? INCACC : DECACC, parsed[in + k].offset, 0); + +		in += count; +		out += count; + +		if (cond_needed_for_safety) +			parsed[out] = INSTRUCTION (END, 0, 0); +		else +			out--; +	} +	parsed_len = out; + +	debug_dump ("bf-optimized.txt", parsed, parsed_len); + +// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t nesting = 0; +	size_t *stack = xcalloc (sizeof *stack, parsed_len); +	for (size_t i = 0; i < parsed_len; i++) +	{ +		switch (parsed[i].cmd) +		{ +		case BEGIN: +			stack[nesting++] = i; +			break; +		case END: +			assert (nesting > 0); + +			--nesting; +			parsed[stack[nesting]].arg = i + 1; + +			// Looping can be disabled by optimizations +			if (parsed[i].arg) +				parsed[i].arg = stack[nesting] + 1; +		default: +			break; +		} +	} +	free (stack); +	assert (nesting == 0); + +	debug_dump ("bf-final.txt", parsed, parsed_len); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; +	// while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + +	str_init (&program); +	size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); +	uint8_t *arith  = xcalloc (sizeof *arith,   parsed_len); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } +#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); } + +	CODE ("\x48\x89\xF8")                     // mov rax, rdi +	CODE ("\x30\xDB")                         // xor bl, bl + +	for (size_t i = 0; i < parsed_len; i++) +	{ +		offsets[i] = program.len; + +		size_t arg = parsed[i].arg; +		assert (arg <= UINT32_MAX); + +		int offset = parsed[i].offset; +		assert (offset <= INT8_MAX && offset >= INT8_MIN); + +		// Don't save what we've just loaded +		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) +			if (i < 2 || i + 1 >= parsed_len +			 || (parsed[i - 2].cmd != LEFT && parsed[i - 2].cmd != RIGHT) +			 || parsed[i - 1].cmd != BEGIN +			 || parsed[i + 1].cmd != END) +				CODE ("\x88\x18")             // mov [rax], bl + +		switch (parsed[i].cmd) +		{ +		case RIGHT: +			// add rax, "arg" -- optimistic, no boundary checking +			if (arg > INT8_MAX) +				{ CODE ("\x48\x05")     DWRD (arg) } +			else +				{ CODE ("\x48\x83\xC0") str_append_c (&program, arg); } +			break; +		case LEFT: +			// sub rax, "arg" -- optimistic, no boundary checking +			if (arg > INT8_MAX) +				{ CODE ("\x48\x2D")     DWRD (arg) } +			else +				{ CODE ("\x48\x83\xE8") str_append_c (&program, arg); } +			break; + +		case EAT: +			CODE ("\x41\x88\xDC")             // mov r12b, bl +			CODE ("\x30\xDB")                 // xor bl, bl +			arith[i] = 1; +			break; +		case INCACC: +			if (offset) +			{ +				CODE ("\x44\x00\x60")         // add [rax+"offset"], r12b +				str_append_c (&program, offset); +			} +			else +			{ +				CODE ("\x44\x00\xE3")         // add bl, r12b +				arith[i] = 1; +			} +			break; +		case DECACC: +			if (offset) +			{ +				CODE ("\x44\x28\x60")         // sub [rax+"offset"], r12b +				str_append_c (&program, offset); +			} +			else +			{ +				CODE ("\x44\x28\xE3")         // sub bl, r12b +				arith[i] = 1; +			} +			break; + +		case INC: +			if (offset) +			{ +				CODE ("\x80\x40")             // add byte [rax+"offset"], "arg" +				str_append_c (&program, offset); +			} +			else +			{ +				arith[i] = 1; +				CODE ("\x80\xC3")             // add bl, "arg" +			} +			str_append_c (&program, arg); +			break; +		case DEC: +			if (offset) +			{ +				CODE ("\x80\x68")             // sub byte [rax+"offset"], "arg" +				str_append_c (&program, offset); +			} +			else +			{ +				arith[i] = 1; +				CODE ("\x80\xEB")             // sub bl, "arg" +			} +			str_append_c (&program, arg); +			break; +		case SET: +			if (offset) +			{ +				CODE ("\xC6\x40")             // mov byte [rax+"offset"], "arg" +				str_append_c (&program, offset); +			} +			else +				CODE ("\xB3")                 // mov bl, "arg" +			str_append_c (&program, arg); +			break; + +		case OUT: +			CODE ("\x50\x53")                 // push rax, push rbx +			CODE ("\x48\x0F\xB6\xFB")         // movzx rdi, bl +			CODE ("\x48\xBE") WORD (stdout)   // mov rsi, "stdout" +			CODE ("\x48\xB8") WORD (fputc)    // mov rax, "fputc" +			CODE ("\xFF\xD0")                 // call rax +			CODE ("\x5B\x58")                 // pop rbx, pop rax +			break; +		case IN: +			CODE ("\x50")                     // push rax +			CODE ("\x48\xB8") WORD (cin)      // mov rax, "cin" +			CODE ("\xFF\xD0")                 // call rax +			CODE ("\x88\xC3")                 // mov bl, al +			CODE ("\x58")                     // pop rax +			break; + +		case BEGIN: +			// Don't test the register when the flag has been set already; +			// this doesn't have much of an effect in practice +			if (!i || !arith[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" +			break; +		case END: +			// We know that the cell is zero, make this an "if", not a "loop"; +			// this doesn't have much of an effect in practice +			if (!arg) +				break; + +			if (!i || !arith[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" +			break; +		} + +		// No sense in reading it out when we overwrite it immediately; +		// this doesn't have much of an effect in practice +		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) +			if (i + 1 >= parsed_len +			 || parsed[i + 1].cmd != SET +			 || parsed[i + 1].offset != 0) +				CODE ("\x8A\x18")             // mov bl, [rax] +	} +	// When there is a loop at the end we need to be able to jump past it +	offsets[parsed_len] = program.len; +	str_append_c (&program, '\xC3');          // ret + +	// Now that we know where each instruction is, fill in relative jumps; +	// this must accurately reflect code generators for BEGIN and END +	for (size_t i = 0; i < parsed_len; i++) +	{ +		if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END) +		 || !parsed[i].arg) +			continue; + +		size_t fixup = offsets[i] + 2; +		if (!i || !arith[i - 1]) +			fixup += 2; + +		*(int32_t *)(program.str + fixup) = +			((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); +	} +	free (offsets); +	free (arith); + +#ifdef DEBUG +	FILE *bin = fopen ("bf-jit.bin", "w"); +	fwrite (program.str, program.len, 1, bin); +	fclose (bin); +#endif + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Some systems may have W^X +	void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +	if (!executable) +		exit_fatal ("mmap: %s\n", strerror (errno)); + +	memcpy (executable, program.str, program.len); +	if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) +		exit_fatal ("mprotect: %s\n", strerror (errno)); + +	// We create crash zones on both ends of the tape for some minimum safety +	long pagesz = sysconf (_SC_PAGESIZE); +	assert (pagesz > 0); + +	const size_t tape_len = (1 << 20) + 2 * pagesz; +	char *tape = mmap (NULL, tape_len, PROT_READ | PROT_WRITE, +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +	if (!tape) +		exit_fatal ("mmap: %s\n", strerror (errno)); + +	memset (tape, 0, tape_len); +	if (mprotect (tape,                     pagesz, PROT_NONE) +	 || mprotect (tape + tape_len - pagesz, pagesz, PROT_NONE)) +		exit_fatal ("mprotect: %s\n", strerror (errno)); + +	((void (*) (char *)) executable)(tape + pagesz); +	return 0; +} diff --git a/interpreters/bf-jit-unsafe.c b/interpreters/bf-jit-unsafe.c new file mode 100644 index 0000000..63fbd7e --- /dev/null +++ b/interpreters/bf-jit-unsafe.c @@ -0,0 +1,495 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> +#include <errno.h> + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ +	#include <unistd.h> +	#include <sys/mman.h> +#else +	#error Platform not supported +#endif + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ +	void *p = calloc (m, n); +	if (!p) +		exit_fatal ("calloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->len = 0; +	self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END, +	EAT, INCACC, DECACC }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; +#define INSTRUCTION(c, a) (struct instruction) { (c), (a) } + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +FILE *input;                            ///< User input + +static int +cin (void) +{ +	int c = fgetc (input); +	assert (c != EOF); +	return c; +} + +// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ +	FILE *fp = fopen (filename, "w"); +	long indent = 0; +	for (size_t i = 0; i < len; i++) +	{ +		if (in[i].cmd == END) +			indent--; +		for (long k = 0; k < indent; k++) +			fprintf (fp, "  "); + +		switch (in[i].cmd) +		{ +		case RIGHT:  fprintf (fp, "RIGHT  %zu\n", in[i].arg); break; +		case LEFT:   fprintf (fp, "LEFT   %zu\n", in[i].arg); break; +		case INC:    fprintf (fp, "INC    %zu\n", in[i].arg); break; +		case DEC:    fprintf (fp, "DEC    %zu\n", in[i].arg); break; +		case OUT:    fprintf (fp, "OUT    %zu\n", in[i].arg); break; +		case IN:     fprintf (fp, "IN     %zu\n", in[i].arg); break; +		case BEGIN:  fprintf (fp, "BEGIN  %zu\n", in[i].arg); break; +		case END:    fprintf (fp, "END    %zu\n", in[i].arg); break; +		case SET:    fprintf (fp, "SET    %zu\n", in[i].arg); break; +		case EAT:    fprintf (fp, "EAT    %zu\n", in[i].arg); break; +		case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break; +		case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break; +		} +		if (in[i].cmd == BEGIN) +			indent++; +	} +	fclose (fp); +} +#else +#define debug_dump(...) +#endif + +int +main (int argc, char *argv[]) +{ +	(void) argc; +	(void) argv; + +	struct str program; +	str_init (&program); + +	int c; +	while ((c = fgetc (stdin)) != EOF) +		str_append_c (&program, c); +	if (ferror (stdin)) +		exit_fatal ("can't read program\n"); +	if (!(input = fopen ("/dev/tty", "rb"))) +		exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	struct instruction *parsed = xcalloc (sizeof *parsed, program.len); +	size_t parsed_len = 0; + +	for (size_t i = 0; i < program.len; i++) +	{ +		enum command cmd; +		switch (program.str[i]) +		{ +		case '>': cmd = RIGHT; break; +		case '<': cmd = LEFT;  break; +		case '+': cmd = INC;   break; +		case '-': cmd = DEC;   break; +		case '.': cmd = OUT;   break; +		case ',': cmd = IN;    break; +		case '[': cmd = BEGIN; break; +		case ']': cmd = END;   break; +		default:  continue; +		} + +		// The most basic optimization is to group identical commands together +		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) +			parsed_len++; + +		parsed[parsed_len - 1].cmd = cmd; +		parsed[parsed_len - 1].arg++; +	} + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	debug_dump ("bf-no-opt.txt", parsed, parsed_len); + +	size_t in = 0, out = 0; +	for (; in < parsed_len; in++, out++) +	{ +		// This shows up in mandelbrot.bf a lot but actually helps hanoi.bf +		if (in + 5 < parsed_len +		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + +		 && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT +		 && parsed[in + 2].arg == parsed[in + 4].arg + +		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) +		 && parsed[in + 3].arg == 1) +		{ +			// This mustn't make the move when the cell is zero already +			parsed[out] = parsed[in]; +			parsed[out + 1] = INSTRUCTION (EAT, 0); +			parsed[out + 2] = parsed[in + 2]; +			parsed[out + 3] = INSTRUCTION +				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); +			parsed[out + 4] = parsed[in + 4]; +			// This disables the looping further in the code; +			// this doesn't have much of an effect in practice +			parsed[out + 5] = INSTRUCTION (END, 0); +			in += 5; +			out += 5; +		} +		// The simpler case that cannot crash and thus can avoid the loop +		else if (in + 5 < parsed_len +		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + +		 && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT +		 && parsed[in + 2].arg == parsed[in + 4].arg + +		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) +		 && parsed[in + 3].arg == 1) +		{ +			parsed[out] = INSTRUCTION (EAT, 0); +			parsed[out + 1] = parsed[in + 2]; +			parsed[out + 2] = INSTRUCTION +				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); +			parsed[out + 3] = parsed[in + 4]; +			in += 5; +			out += 3; +		} +		else if (in + 2 < parsed_len +		 && parsed[in    ].cmd == BEGIN +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 +		 && parsed[in + 2].cmd == END) +		{ +			parsed[out] = INSTRUCTION (SET, 0); +			in += 2; +		} +		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) +			parsed[--out].arg += parsed[in].arg; +		else if (out != in) +			parsed[out] = parsed[in]; +	} +	parsed_len = out; + +	for (in = 0, out = 0; in < parsed_len; in++, out++) +	{ +		ssize_t dir = 0; +		if (parsed[in].cmd == RIGHT) +			dir = parsed[in].arg; +		else if (parsed[in].cmd == LEFT) +			dir = -(ssize_t) parsed[in].arg; +		else +		{ +			parsed[out] = parsed[in]; +			continue; +		} + +		for (; in + 1 < parsed_len; in++) +		{ +			if (parsed[in + 1].cmd == RIGHT) +				dir += parsed[in + 1].arg; +			else if (parsed[in + 1].cmd == LEFT) +				dir -= (ssize_t) parsed[in + 1].arg; +			else +				break; +		} + +		if (!dir) +			out--; +		else if (dir > 0) +			parsed[out] = INSTRUCTION (RIGHT, dir); +		else +			parsed[out] = INSTRUCTION (LEFT, -dir); +	} +	parsed_len = out; + +	debug_dump ("bf-optimized.txt", parsed, parsed_len); + +// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t nesting = 0; +	size_t *stack = xcalloc (sizeof *stack, parsed_len); +	for (size_t i = 0; i < parsed_len; i++) +	{ +		switch (parsed[i].cmd) +		{ +		case BEGIN: +			stack[nesting++] = i; +			break; +		case END: +			assert (nesting > 0); + +			--nesting; +			parsed[stack[nesting]].arg = i + 1; + +			// Looping can be disabled by optimizations +			if (parsed[i].arg) +				parsed[i].arg = stack[nesting] + 1; +		default: +			break; +		} +	} +	free (stack); +	assert (nesting == 0); + +	debug_dump ("bf-final.txt", parsed, parsed_len); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; +	// while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + +	str_init (&program); +	size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); +	uint8_t *arith  = xcalloc (sizeof *arith,   parsed_len); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } +#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); } + +	CODE ("\x48\x89\xF8")                     // mov rax, rdi +	CODE ("\x30\xDB")                         // xor bl, bl + +	for (size_t i = 0; i < parsed_len; i++) +	{ +		offsets[i] = program.len; + +		size_t arg = parsed[i].arg; +		assert (arg <= UINT32_MAX); + +		// Don't save what we've just loaded +		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) +			if (i < 2 || i + 1 >= parsed_len +			 || (parsed[i - 2].cmd != LEFT && parsed[i - 2].cmd != RIGHT) +			 || parsed[i - 1].cmd != BEGIN +			 || parsed[i + 1].cmd != END) +				CODE ("\x88\x18")             // mov [rax], bl + +		switch (parsed[i].cmd) +		{ +		case RIGHT: +			// add rax, "arg" -- optimistic, no boundary checking +			if (arg > INT8_MAX) +				{ CODE ("\x48\x05") DWRD (arg) } +			else +				{ CODE ("\x48\x83\xC0") str_append_c (&program, arg); } +			break; +		case LEFT: +			// sub rax, "arg" -- optimistic, no boundary checking +			if (arg > INT8_MAX) +				{ CODE ("\x48\x2D") DWRD (arg) } +			else +				{ CODE ("\x48\x83\xE8") str_append_c (&program, arg); } +			break; + +		case EAT: +			CODE ("\x41\x88\xDC")             // mov r12b, bl +			CODE ("\x30\xDB")                 // xor bl, bl +			arith[i] = 1; +			break; +		case INCACC: +			CODE ("\x44\x00\xE3")             // add bl, r12b +			arith[i] = 1; +			break; +		case DECACC: +			CODE ("\x44\x28\xE3")             // sub bl, r12b +			arith[i] = 1; +			break; + +		case INC: +			CODE ("\x80\xC3")                 // add bl, "arg" +			str_append_c (&program, arg); +			arith[i] = 1; +			break; +		case DEC: +			CODE ("\x80\xEB")                 // sub bl, "arg" +			str_append_c (&program, arg); +			arith[i] = 1; +			break; +		case SET: +			CODE ("\xB3")                     // mov bl, "arg" +			str_append_c (&program, arg); +			break; + +		case OUT: +			CODE ("\x50\x53")                 // push rax, push rbx +			CODE ("\x48\x0F\xB6\xFB")         // movzx rdi, bl +			CODE ("\x48\xBE") WORD (stdout)   // mov rsi, "stdout" +			CODE ("\x48\xB8") WORD (fputc)    // mov rax, "fputc" +			CODE ("\xFF\xD0")                 // call rax +			CODE ("\x5B\x58")                 // pop rbx, pop rax +			break; +		case IN: +			CODE ("\x50")                     // push rax +			CODE ("\x48\xB8") WORD (cin)      // mov rax, "cin" +			CODE ("\xFF\xD0")                 // call rax +			CODE ("\x88\xC3")                 // mov bl, al +			CODE ("\x58")                     // pop rax +			break; + +		case BEGIN: +			// Don't test the register when the flag has been set already; +			// this doesn't have much of an effect in practice +			if (!i || !arith[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" +			break; +		case END: +			// We know that the cell is zero, make this an "if", not a "loop"; +			// this doesn't have much of an effect in practice +			if (!arg) +				break; + +			if (!i || !arith[i - 1]) +				CODE ("\x84\xDB")             // test bl, bl +			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" +			break; +		} + +		// No sense in reading it out when we overwrite it immediately; +		// this doesn't have much of an effect in practice +		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) +			if (i + 1 >= parsed_len +			 || parsed[i + 1].cmd != SET) +				CODE ("\x8A\x18")             // mov bl, [rax] +	} +	// When there is a loop at the end we need to be able to jump past it +	offsets[parsed_len] = program.len; +	str_append_c (&program, '\xC3');          // ret + +	// Now that we know where each instruction is, fill in relative jumps; +	// this must accurately reflect code generators for BEGIN and END +	for (size_t i = 0; i < parsed_len; i++) +	{ +		if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END) +		 || !parsed[i].arg) +			continue; + +		size_t fixup = offsets[i] + 2; +		if (!i || !arith[i - 1]) +			fixup += 2; + +		*(int32_t *)(program.str + fixup) = +			((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); +	} +	free (offsets); +	free (arith); + +#ifdef DEBUG +	FILE *bin = fopen ("bf-jit.bin", "w"); +	fwrite (program.str, program.len, 1, bin); +	fclose (bin); +#endif + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Some systems may have W^X +	void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +	if (!executable) +		exit_fatal ("mmap: %s\n", strerror (errno)); + +	memcpy (executable, program.str, program.len); +	if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) +		exit_fatal ("mprotect: %s\n", strerror (errno)); + +	// We create crash zones on both ends of the tape for some minimum safety +	long pagesz = sysconf (_SC_PAGESIZE); +	assert (pagesz > 0); + +	const size_t tape_len = (1 << 20) + 2 * pagesz; +	char *tape = mmap (NULL, tape_len, PROT_READ | PROT_WRITE, +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +	if (!tape) +		exit_fatal ("mmap: %s\n", strerror (errno)); + +	memset (tape, 0, tape_len); +	if (mprotect (tape,                     pagesz, PROT_NONE) +	 || mprotect (tape + tape_len - pagesz, pagesz, PROT_NONE)) +		exit_fatal ("mprotect: %s\n", strerror (errno)); + +	((void (*) (char *)) executable)(tape + pagesz); +	return 0; +} diff --git a/interpreters/bf-jit.c b/interpreters/bf-jit.c new file mode 100644 index 0000000..0be6d94 --- /dev/null +++ b/interpreters/bf-jit.c @@ -0,0 +1,327 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> +#include <errno.h> + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ +	#include <sys/mman.h> +#else +	#error Platform not supported +#endif + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ +	void *p = calloc (m, n); +	if (!p) +		exit_fatal ("calloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->len = 0; +	self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +struct str data;                        ///< Data tape +volatile size_t dataptr;                ///< Current location on the tape +FILE *input;                            ///< User input + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// Some things I just really don't want to write in assembly even though it +// is effectively a big performance hit, eliminating the advantage of JIT + +static void +right (size_t arg) +{ +	assert (SIZE_MAX - dataptr > arg); +	dataptr += arg; + +	while (dataptr >= data.len) +		str_append_c (&data, 0); +} + +static void +left (size_t arg) +{ +	assert (dataptr >= arg); +	dataptr -= arg; +} + +static int +cin (void) +{ +	int c = fgetc (input); +	assert (c != EOF); +	return c; +} + +// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +int +main (int argc, char *argv[]) +{ +	(void) argc; +	(void) argv; + +	struct str program; +	str_init (&program); + +	int c; +	while ((c = fgetc (stdin)) != EOF) +		str_append_c (&program, c); +	if (ferror (stdin)) +		exit_fatal ("can't read program\n"); +	if (!(input = fopen ("/dev/tty", "rb"))) +		exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	struct instruction *parsed = xcalloc (sizeof *parsed, program.len); +	size_t parsed_len = 0; + +	for (size_t i = 0; i < program.len; i++) +	{ +		enum command cmd; +		switch (program.str[i]) +		{ +		case '>': cmd = RIGHT; break; +		case '<': cmd = LEFT;  break; +		case '+': cmd = INC;   break; +		case '-': cmd = DEC;   break; +		case '.': cmd = OUT;   break; +		case ',': cmd = IN;    break; +		case '[': cmd = BEGIN; break; +		case ']': cmd = END;   break; +		default:  continue; +		} + +		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) +			parsed_len++; + +		parsed[parsed_len - 1].cmd = cmd; +		parsed[parsed_len - 1].arg++; +	} + +// - - Simple optimization pass  - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t in = 0, out = 0; +	for (; in < parsed_len; in++, out++) +	{ +		if (in + 2 < parsed_len +		 && parsed[in    ].cmd == BEGIN +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 +		 && parsed[in + 2].cmd == END) +		{ +			parsed[out].cmd = SET; +			parsed[out].arg = 0; +			in += 2; +		} +		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) +			parsed[--out].arg += parsed[in].arg; +		else if (out != in) +			parsed[out] = parsed[in]; +	} + +	parsed_len = out; + +// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t nesting = 0; +	size_t *stack = xcalloc (sizeof *stack, parsed_len); +	for (size_t i = 0; i < parsed_len; i++) +	{ +		switch (parsed[i].cmd) +		{ +		case BEGIN: +			stack[nesting++] = i; +			break; +		case END: +			assert (nesting > 0); + +			--nesting; +			parsed[stack[nesting]].arg = i + 1; +			parsed[i].arg = stack[nesting] + 1; +		default: +			break; +		} +	} +	free (stack); +	assert (nesting == 0); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; +	// while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + +	str_init (&program); +	size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } + +	CODE ("\x49\xBD") WORD (&dataptr)         // mov r13, qword "&dataptr" +	CODE ("\x49\xBF") WORD (&data.str)        // mov r15, qword "&data.str" +	CODE ("\x4D\x8B\x37")                     // mov r14, qword [r15] +	CODE ("\x30\xDB")                         // xor bl, bl + +	for (size_t i = 0; i < parsed_len; i++) +	{ +		offsets[i] = program.len; + +		size_t arg = parsed[i].arg; +		switch (parsed[i].cmd) +		{ +		case RIGHT: +			CODE ("\x41\x88\x1E")             // mov [r14], bl +			CODE ("\x48\xBF") WORD (arg)      // mov rdi, "arg" +			CODE ("\x48\xB8") WORD (right)    // mov rax, "right" +			CODE ("\xFF\xD0")                 // call rax + +			// The data could get reallocated, so reload the address +			CODE ("\x4D\x8B\x37")             // mov r14, qword [r15] +			CODE ("\x4D\x03\x75\x00")         // add r14, [r13] +			CODE ("\x41\x8A\x1E")             // mov bl, [r14] +			break; +		case LEFT: +			CODE ("\x41\x88\x1E")             // mov [r14], bl +			CODE ("\x48\xBF") WORD (arg)      // mov rdi, "arg" +			CODE ("\x49\x29\xFE")             // sub r14, rdi -- optimistic +			CODE ("\x48\xB8") WORD (left)     // mov rax, "left" +			CODE ("\xFF\xD0")                 // call rax +			CODE ("\x41\x8A\x1E")             // mov bl, [r14] +			break; + +		case INC: +			CODE ("\x80\xC3")                 // add bl, "arg" +			str_append_c (&program, arg); +			break; +		case DEC: +			CODE ("\x80\xEB")                 // sub bl, "arg" +			str_append_c (&program, arg); +			break; +		case SET: +			CODE ("\xB3")                     // mov bl, "arg" +			str_append_c (&program, arg); +			break; + +		case OUT: +			CODE ("\x48\x0F\xB6\xFB")         // movzx rdi, bl +			CODE ("\x48\xBE") WORD (stdout)   // mov rsi, "stdout" +			CODE ("\x48\xB8") WORD (fputc)    // mov rax, "fputc" +			CODE ("\xFF\xD0")                 // call rax +			break; +		case IN: +			CODE ("\x48\xB8") WORD (cin)      // mov rax, "cin" +			CODE ("\xFF\xD0")                 // call rax +			CODE ("\x88\xC3")                 // mov bl, al +			break; + +		case BEGIN: +			CODE ("\x84\xDB")                 // test bl, bl +			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" +			break; +		case END: +			CODE ("\x84\xDB")                 // test bl, bl +			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" +			break; +		} +	} +	// When there is a loop at the end we need to be able to jump past it +	offsets[parsed_len] = program.len; +	str_append_c (&program, '\xC3');          // ret + +	// Now that we know where each instruction is, fill in relative jumps +	for (size_t i = 0; i < parsed_len; i++) +	{ +		if (parsed[i].cmd != BEGIN && parsed[i].cmd != END) +			continue; +		size_t fixup = offsets[i] + 4; +		*(int32_t *)(program.str + fixup) = +			((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); +	} +	free (offsets); + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	// Some systems may have W^X +	void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +	if (!executable) +		exit_fatal ("mmap: %s\n", strerror (errno)); + +	memcpy (executable, program.str, program.len); +	if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) +		exit_fatal ("mprotect: %s\n", strerror (errno)); + +	str_init (&data); +	str_append_c (&data, 0); +	((void (*) (void)) executable)(); +	return 0; +} diff --git a/interpreters/bf-optimizing.c b/interpreters/bf-optimizing.c new file mode 100644 index 0000000..b37c0f2 --- /dev/null +++ b/interpreters/bf-optimizing.c @@ -0,0 +1,213 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> +#include <errno.h> + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ +	void *p = calloc (m, n); +	if (!p) +		exit_fatal ("calloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->len = 0; +	self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Main -------------------------------------------------------------------- + +struct str program;                     ///< Raw program +struct str data;                        ///< Data tape + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; + +int +main (int argc, char *argv[]) +{ +	(void) argc; str_init (&program); +	(void) argv; str_init (&data); + +	int c; +	while ((c = fgetc (stdin)) != EOF) +		str_append_c (&program, c); +	if (ferror (stdin)) +		exit_fatal ("can't read program\n"); + +	FILE *input = fopen ("/dev/tty", "rb"); +	if (!input) +		exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	struct instruction *parsed = xcalloc (sizeof *parsed, program.len); +	size_t parsed_len = 0; + +	for (size_t i = 0; i < program.len; i++) +	{ +		enum command cmd; +		switch (program.str[i]) +		{ +		case '>': cmd = RIGHT; break; +		case '<': cmd = LEFT;  break; +		case '+': cmd = INC;   break; +		case '-': cmd = DEC;   break; +		case '.': cmd = OUT;   break; +		case ',': cmd = IN;    break; +		case '[': cmd = BEGIN; break; +		case ']': cmd = END;   break; +		default:  continue; +		} + +		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) +			parsed_len++; + +		parsed[parsed_len - 1].cmd = cmd; +		parsed[parsed_len - 1].arg++; +	} + +// - - Simple optimization pass  - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t in = 0, out = 0; +	for (; in < parsed_len; in++, out++) +	{ +		if (in + 2 < parsed_len +		 && parsed[in    ].cmd == BEGIN +		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 +		 && parsed[in + 2].cmd == END) +		{ +			parsed[out].cmd = SET; +			parsed[out].arg = 0; +			in += 2; +		} +		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) +			parsed[--out].arg += parsed[in].arg; +		else if (out != in) +			parsed[out] = parsed[in]; +	} + +	parsed_len = out; + +// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t nesting = 0; +	size_t *stack = xcalloc (sizeof *stack, parsed_len); +	for (size_t i = 0; i < parsed_len; i++) +	{ +		switch (parsed[i].cmd) +		{ +		case BEGIN: +			stack[nesting++] = i; +			break; +		case END: +			assert (nesting > 0); + +			--nesting; +			parsed[stack[nesting]].arg = i; +			parsed[i].arg = stack[nesting]; +		default: +			break; +		} +	} +	assert (nesting == 0); + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +	size_t dataptr = 0; +	str_append_c (&data, 0); + +	for (size_t i = 0; i < parsed_len; i++) +	{ +		size_t arg = parsed[i].arg; +		switch (parsed[i].cmd) +		{ +		case RIGHT: +			assert (SIZE_MAX - dataptr > arg); +			dataptr += arg; + +			while (dataptr >= data.len) +				str_append_c (&data, 0); +			break; +		case LEFT: +			assert (dataptr >= arg); +			dataptr -= arg; +			break; + +		case INC: data.str[dataptr] += arg; break; +		case DEC: data.str[dataptr] -= arg; break; +		case SET: data.str[dataptr]  = arg; break; + +		case OUT: +			fputc (data.str[dataptr], stdout); +			break; +		case IN: +			data.str[dataptr] = c = fgetc (input); +			assert (c != EOF); +			break; + +		case BEGIN: if (!data.str[dataptr]) i = arg; break; +		case END:   if ( data.str[dataptr]) i = arg; break; +		} +	} +	return 0; +} diff --git a/interpreters/bf.c b/interpreters/bf.c new file mode 100644 index 0000000..606c609 --- /dev/null +++ b/interpreters/bf.c @@ -0,0 +1,160 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <assert.h> +#include <errno.h> + +#define exit_fatal(...)                                                        \ +	do {                                                                       \ +		fprintf (stderr, "fatal: " __VA_ARGS__);                               \ +		exit (EXIT_FAILURE);                                                   \ +	} while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xmalloc (size_t n) +{ +	void *p = malloc (n); +	if (!p) +		exit_fatal ("malloc: %s\n", strerror (errno)); +	return p; +} + +static void * +xrealloc (void *o, size_t n) +{ +	void *p = realloc (o, n); +	if (!p && n) +		exit_fatal ("realloc: %s\n", strerror (errno)); +	return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ +	char *str;                          ///< String data, null terminated +	size_t alloc;                       ///< How many bytes are allocated +	size_t len;                         ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ +	self->alloc = 16; +	self->len = 0; +	self->str = strcpy (xmalloc (self->alloc), ""); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ +	// We allocate at least one more byte for the terminating null character +	size_t new_alloc = self->alloc; +	while (new_alloc <= self->len + n) +		new_alloc <<= 1; +	if (new_alloc != self->alloc) +		self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ +	str_ensure_space (self, n); +	memcpy (self->str + self->len, data, n); +	self->len += n; +	self->str[self->len] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ +	str_append_data (self, &c, 1); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ +	struct str program; str_init (&program); +	struct str data;    str_init (&data); + +	int c; +	while ((c = fgetc (stdin)) != EOF) +		str_append_c (&program, c); +	if (ferror (stdin)) +		exit_fatal ("can't read program\n"); + +	FILE *input = fopen ("/dev/tty", "rb"); +	if (!input) +		exit_fatal ("can't open terminal for reading\n"); + +	size_t dataptr = 0; +	str_append_c (&data, 0); + +	for (size_t i = 0; i < program.len; i++) +	{ +		switch (program.str[i]) +		{ +			long pairs; +		case '>': +			assert (dataptr != SIZE_MAX); +			dataptr++; +			if (dataptr == data.len) +				str_append_c (&data, 0); +			break; +		case '<': +			assert (dataptr != 0); +			dataptr--; +			break; + +		case '+': data.str[dataptr]++; break; +		case '-': data.str[dataptr]--; break; + +		case '.': +			fputc (data.str[dataptr], stdout); +			break; +		case ',': +			data.str[dataptr] = c = fgetc (input); +			assert (c != EOF); +			break; + +		case '[': +			if (data.str[dataptr]) break; + +			for (pairs = 0; i < program.len; i++) +			{ +				switch (program.str[i]) +				{ +				case '[': pairs++; break; +				case ']': pairs--; break; +				} +				if (!pairs) +					break; +			} +			assert (!pairs); +			break; +		case ']': +			if (!data.str[dataptr]) break; + +			for (pairs = 0; i != SIZE_MAX; i--) +			{ +				switch (program.str[i]) +				{ +				case '[': pairs--; break; +				case ']': pairs++; break; +				} +				if (!pairs) +					break; +			} +			assert (!pairs); +			break; +		default: +			break; +		} +	} +	return 0; +} | 
