From b846a7a0ea77de85c0d84d3653d8db8d3c81b4c9 Mon Sep 17 00:00:00 2001
From: Přemysl Janouch
Date: Sat, 24 Dec 2016 22:27:38 +0100
Subject: Add support for OpenBSD
It just rejects the generic System V ABI and has other syscall numbers.
---
Makefile | 8 +-
README.adoc | 6 +-
bfc-amd64-linux.c | 733 -----------------------------------------------------
bfc-amd64.c | 744 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 752 insertions(+), 739 deletions(-)
delete mode 100644 bfc-amd64-linux.c
create mode 100644 bfc-amd64.c
diff --git a/Makefile b/Makefile
index 5bb5db1..d25311c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,13 @@
# All we need is C99 and POSIX, which this should make available
CFLAGS = -std=gnu99
-NAMES = bfc-amd64-linux
+NAMES = bfc-amd64-linux bfc-amd64-openbsd
all: $(NAMES)
-%: %.c
- $(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@
+%-linux: %.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ -DTARGET_LINUX
+%-openbsd: %.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ -DTARGET_OPENBSD
clean:
rm -f $(NAMES)
diff --git a/README.adoc b/README.adoc
index e04e8bd..69ebd7e 100644
--- a/README.adoc
+++ b/README.adoc
@@ -1,8 +1,8 @@
bfc
===
-'bfc' is a small, fast, self-contained, optimizing Brainfuck compiler for Linux
-on Intel x86-64.
+'bfc' is a small, fast, self-contained, optimizing Brainfuck compiler for *nix
+on Intel x86-64. Currently supported targets are Linux and OpenBSD.
Also included are several interpreters in various states of sophistication that
document my progress as I was writing this, from the simplest approach to an
@@ -14,7 +14,7 @@ The compiler itself is platform agnostic.
Building
--------
Build dependencies: a C99 compiler +
-Runtime dependencies: Linux
+Runtime dependencies: Linux or OpenBSD
$ git clone https://github.com/pjanouch/bfc.git
$ cd bfc
diff --git a/bfc-amd64-linux.c b/bfc-amd64-linux.c
deleted file mode 100644
index 1579681..0000000
--- a/bfc-amd64-linux.c
+++ /dev/null
@@ -1,733 +0,0 @@
-// This is an exercise in futility more than anything else
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#ifdef __unix__
-#include
-#endif
-
-#define exit_fatal(...) \
- do { \
- fprintf (stderr, "fatal: " __VA_ARGS__); \
- exit (EXIT_FAILURE); \
- } while (0)
-
-// --- Safe memory management --------------------------------------------------
-
-static void *
-xcalloc (size_t m, size_t n)
-{
- void *p = calloc (m, n);
- if (!p)
- exit_fatal ("calloc: %s\n", strerror (errno));
- return p;
-}
-
-static void *
-xrealloc (void *o, size_t n)
-{
- void *p = realloc (o, n);
- if (!p && n)
- exit_fatal ("realloc: %s\n", strerror (errno));
- return p;
-}
-
-// --- Dynamically allocated strings -------------------------------------------
-
-struct str
-{
- char *str; ///< String data, null terminated
- size_t alloc; ///< How many bytes are allocated
- size_t len; ///< How long the string actually is
-};
-
-static void
-str_init (struct str *self)
-{
- self->len = 0;
- self->str = xcalloc (1, (self->alloc = 16));
-}
-
-static void
-str_ensure_space (struct str *self, size_t n)
-{
- // We allocate at least one more byte for the terminating null character
- size_t new_alloc = self->alloc;
- while (new_alloc <= self->len + n)
- new_alloc <<= 1;
- if (new_alloc != self->alloc)
- self->str = xrealloc (self->str, (self->alloc = new_alloc));
-}
-
-static void
-str_append_data (struct str *self, const void *data, size_t n)
-{
- str_ensure_space (self, n);
- memcpy (self->str + self->len, data, n);
- self->str[self->len += n] = '\0';
-}
-
-static void
-str_append_c (struct str *self, char c)
-{
- str_append_data (self, &c, 1);
-}
-
-// --- Application -------------------------------------------------------------
-
-enum command
-{
- RIGHT, LEFT, INC, DEC, IN, OUT, BEGIN, END,
- SET, EAT, INCACC, DECACC
-};
-
-bool grouped[] = { 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
-struct instruction { enum command cmd; int offset; size_t arg; };
-#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) }
-
-// - - Debugging - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#ifdef DEBUG
-static void
-debug_dump_instruction (FILE *fp, const struct instruction *in)
-{
- const char *name;
- switch (in->cmd)
- {
- case RIGHT: name = "RIGHT "; break;
- case LEFT: name = "LEFT "; break;
- case INC: name = "INC "; break;
- case DEC: name = "DEC "; break;
- case OUT: name = "OUT "; break;
- case IN: name = "IN "; break;
- case BEGIN: name = "BEGIN "; break;
- case END: name = "END "; break;
- case SET: name = "SET "; break;
- case EAT: name = "EAT "; break;
- case INCACC: name = "INCACC"; break;
- case DECACC: name = "DECACC"; break;
- }
- fprintf (fp, "%s %zu", name, in->arg);
- if (in->offset != 0)
- fprintf (fp, " [%d]", in->offset);
- fprintf (fp, "\n");
-}
-
-static void
-debug_dump (const char *filename, struct instruction *in, size_t len)
-{
- FILE *fp = fopen (filename, "w");
- long indent = 0;
- for (size_t i = 0; i < len; i++)
- {
- if (in[i].cmd == END)
- indent--;
- for (long k = 0; k < indent; k++)
- fputs (" ", fp);
- debug_dump_instruction (fp, &in[i]);
- if (in[i].cmd == BEGIN)
- indent++;
- }
- fclose (fp);
-}
-#else
-#define debug_dump(...)
-#endif
-
-// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-static size_t
-optimize_assignment (struct instruction *irb, size_t irb_len)
-{
- size_t in = 0, out = 0;
- for (; in < irb_len; in++, out++)
- {
- if (in + 2 < irb_len
- && irb[in ].cmd == BEGIN
- && irb[in + 1].cmd == DEC && irb[in + 1].arg == 1
- && irb[in + 2].cmd == END)
- {
- irb[out] = INSTRUCTION (SET, 0, 0);
- in += 2;
- }
- else if (out && irb[out - 1].cmd == SET && irb[in].cmd == INC)
- irb[--out].arg += irb[in].arg;
- else if (out != in)
- irb[out] = irb[in];
- }
- return out;
-}
-
-// Add the offset of the LEFT/RIGHT instruction to the accumulator
-static bool
-add_direction_offset (struct instruction *irb, intptr_t *acc)
-{
- if (irb->cmd == RIGHT)
- *acc += irb->arg;
- else if (irb->cmd == LEFT)
- *acc -= (intptr_t) irb->arg;
- else
- return false;
- return true;
-}
-
-// Add offsets to INC/DEC/SET stuck between LEFT/RIGHT
-// and compress the LEFT/RIGHT sequences
-static size_t
-optimize_offseted_inc_dec (struct instruction *irb, size_t irb_len)
-{
- size_t in = 0, out = 0;
- for (in = 0, out = 0; in < irb_len; in++, out++)
- {
- intptr_t dir = 0;
- if (!add_direction_offset (&irb[in], &dir))
- {
- irb[out] = irb[in];
- continue;
- }
-
- while (in + 2 < irb_len)
- {
- // An immediate offset has its limits on x86-64
- if (dir < INT8_MIN || dir > INT8_MAX)
- break;
- intptr_t diff = 0;
- if (!add_direction_offset (&irb[in + 2], &diff))
- break;
- int cmd = irb[in + 1].cmd;
- if (cmd != INC && cmd != DEC && cmd != SET)
- break;
-
- irb[out] = irb[in + 1];
- irb[out].offset = dir;
-
- dir += diff;
- out += 1;
- in += 2;
- }
-
- for (; in + 1 < irb_len; in++)
- if (!add_direction_offset (&irb[in + 1], &dir))
- break;
-
- if (!dir)
- out--;
- else if (dir > 0)
- irb[out] = INSTRUCTION (RIGHT, 0, dir);
- else
- irb[out] = INSTRUCTION (LEFT, 0, -dir);
- }
- return out;
-}
-
-// Try to eliminate loops that eat a cell and add/subtract its value
-// to/from some other cell
-static size_t
-optimize_inc_dec_loops (struct instruction *irb, size_t irb_len)
-{
- size_t in = 0, out = 0;
- for (in = 0, out = 0; in < irb_len; in++, out++)
- {
- irb[out] = irb[in];
- if (irb[in].cmd != BEGIN)
- continue;
-
- bool ok = false;
- size_t count = 0;
- for (size_t k = in + 1; k < irb_len; k++)
- {
- if (irb[k].cmd == END)
- {
- ok = true;
- break;
- }
- if (irb[k].cmd != INC
- && irb[k].cmd != DEC)
- break;
- count++;
- }
- if (!ok)
- continue;
-
- // Stable sort operations by their offsets, put [0] first
- bool sorted;
- do
- {
- sorted = true;
- for (size_t k = 1; k < count; k++)
- {
- if (irb[in + k].offset == 0)
- continue;
- if (irb[in + k + 1].offset != 0
- && irb[in + k].offset <= irb[in + k + 1].offset)
- continue;
-
- struct instruction tmp = irb[in + k + 1];
- irb[in + k + 1] = irb[in + k];
- irb[in + k] = tmp;
- sorted = false;
- }
- }
- while (!sorted);
-
- // Abort the optimization on duplicate offsets (complication with [0])
- for (size_t k = 1; k < count; k++)
- if (irb[in + k].offset == irb[in + k + 1].offset)
- ok = false;
- // XXX: can't make the code longer either
- for (size_t k = 1; k <= count; k++)
- if (irb[in + k].arg != 1)
- ok = false;
- if (!ok
- || irb[in + 1].cmd != DEC
- || irb[in + 1].offset != 0)
- continue;
-
- int min_safe_left_offset = 0;
- if (in > 1 && irb[in - 1].cmd == RIGHT)
- min_safe_left_offset = -irb[in - 1].arg;
-
- bool cond_needed_for_safety = false;
- for (size_t k = 0; k < count; k++)
- if (irb[in + k + 1].offset < min_safe_left_offset)
- {
- cond_needed_for_safety = true;
- break;
- }
-
- in++;
- if (cond_needed_for_safety)
- out++;
-
- irb[out] = INSTRUCTION (EAT, 0, 0);
- for (size_t k = 1; k < count; k++)
- irb[out + k] = INSTRUCTION (irb[in + k].cmd == INC
- ? INCACC : DECACC, irb[in + k].offset, 0);
-
- in += count;
- out += count;
-
- if (cond_needed_for_safety)
- irb[out] = INSTRUCTION (END, 0, 0);
- else
- out--;
- }
- return out;
-}
-
-// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-static void
-pair_loops (struct instruction *irb, size_t irb_len)
-{
- size_t nesting = 0;
- size_t *stack = xcalloc (sizeof *stack, irb_len);
- for (size_t i = 0; i < irb_len; i++)
- {
- switch (irb[i].cmd)
- {
- case BEGIN:
- stack[nesting++] = i;
- break;
- case END:
- if (nesting <= 0)
- exit_fatal ("unbalanced loops\n");
-
- --nesting;
- irb[stack[nesting]].arg = i + 1;
-
- // Looping can be disabled by optimizations
- if (irb[i].arg)
- irb[i].arg = stack[nesting] + 1;
- default:
- break;
- }
- }
- free (stack);
-
- if (nesting != 0)
- exit_fatal ("unbalanced loops\n");
-}
-
-// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-int
-main (int argc, char *argv[])
-{
- if (argc > 3)
- exit_fatal ("usage: %s [INPUT-FILE] [OUTPUT-FILE]\n", argv[0]);
-
- FILE *input_file = stdin;
- if (argc > 1 && !(input_file = fopen (argv[1], "r")))
- exit_fatal ("fopen: %s: %s\n", argv[1], strerror (errno));
-
- const char *output_path = "a.out";
- if (argc > 2)
- output_path = argv[2];
-
- struct str buffer;
- str_init (&buffer);
-
- int c;
- while ((c = fgetc (input_file)) != EOF)
- str_append_c (&buffer, c);
- if (ferror (input_file))
- exit_fatal ("can't read program\n");
- fclose (input_file);
-
-// - - Decode, group and optimize - - - - - - - - - - - - - - - - - - - - - - -
-
- // This is our Intermediate Representation Buffer
- struct instruction *irb = xcalloc (sizeof *irb, buffer.len);
- size_t irb_len = 0;
-
- for (size_t i = 0; i < buffer.len; i++)
- {
- enum command cmd;
- switch (buffer.str[i])
- {
- case '>': cmd = RIGHT; break;
- case '<': cmd = LEFT; break;
- case '+': cmd = INC; break;
- case '-': cmd = DEC; break;
- case '.': cmd = OUT; break;
- case ',': cmd = IN; break;
- case '[': cmd = BEGIN; break;
- case ']': cmd = END; break;
- default: continue;
- }
-
- // The most basic optimization is to group identical commands together
- if (!irb_len || !grouped[cmd] || irb[irb_len - 1].cmd != cmd)
- irb_len++;
-
- irb[irb_len - 1].cmd = cmd;
- irb[irb_len - 1].arg++;
- }
-
- debug_dump ("bf-no-opt.txt", irb, irb_len);
- irb_len = optimize_assignment (irb, irb_len);
- debug_dump ("bf-pre-offsets.txt", irb, irb_len);
- irb_len = optimize_offseted_inc_dec (irb, irb_len);
- debug_dump ("bf-pre-incdec-unloop.txt", irb, irb_len);
- irb_len = optimize_inc_dec_loops (irb, irb_len);
- debug_dump ("bf-optimized.txt", irb, irb_len);
- pair_loops (irb, irb_len);
- debug_dump ("bf-final.txt", irb, irb_len);
-
-// - - Code generation - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- str_init (&buffer);
- size_t *offsets = xcalloc (sizeof *offsets, irb_len + 1);
- bool *sets_flags = xcalloc (sizeof *sets_flags, irb_len);
-
-#define CODE(x) { char t[] = x; str_append_data (&buffer, t, sizeof t - 1); }
-#define LE(v) (uint8_t[]) { v, v>>8, v>>16, v>>24, v>>32, v>>40, v>>48, v>>56 }
-#define DB(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 1); }
-#define DW(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 2); }
-#define DD(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 4); }
-#define DQ(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 8); }
-
- enum
- {
- ELF_LOAD_CODE = 0x400000, // where code is loaded (usual)
- ELF_LOAD_DATA = 0x800000 // where the tape is placed
- };
-
- CODE ("\xB8") DD (ELF_LOAD_DATA) // mov rax, "ELF_LOAD_DATA"
- CODE ("\x30\xDB") // xor bl, bl
-
- for (size_t i = 0; i < irb_len; i++)
- {
- offsets[i] = buffer.len;
-
- size_t arg = irb[i].arg;
- assert (arg <= UINT32_MAX);
-
- int offset = irb[i].offset;
- assert (offset <= INT8_MAX && offset >= INT8_MIN);
-
- // Don't save what we've just loaded
- if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
- if (i < 2 || i + 1 >= irb_len
- || (irb[i - 2].cmd != LEFT && irb[i - 2].cmd != RIGHT)
- || irb[i - 1].cmd != BEGIN
- || irb[i + 1].cmd != END)
- CODE ("\x88\x18") // mov [rax], bl
-
- switch (irb[i].cmd)
- {
- case RIGHT:
- // add rax, "arg" -- optimistic, no boundary checking
- if (arg > INT8_MAX) { CODE ("\x48\x05") DD (arg) }
- else { CODE ("\x48\x83\xC0") DB (arg) }
- break;
- case LEFT:
- // sub rax, "arg" -- optimistic, no boundary checking
- if (arg > INT8_MAX) { CODE ("\x48\x2D") DD (arg) }
- else { CODE ("\x48\x83\xE8") DB (arg) }
- break;
-
- case EAT:
- // NOTE: the kernel destroys rcx and r11 on syscalls,
- // there must be no OUT or IN between EAT and INCACC/DECACC
- CODE ("\x88\xD9" "\x30\xDB") // mov cl, bl; xor bl, bl
- sets_flags[i] = true;
- break;
- case INCACC:
- if (offset)
- {
- CODE ("\x00\x48") DB (offset) // add [rax+"offset"], cl
- }
- else
- {
- CODE ("\x00\xCB") // add bl, cl
- sets_flags[i] = true;
- }
- break;
- case DECACC:
- if (offset)
- {
- CODE ("\x28\x48") DB (offset) // sub [rax+"offset"], cl
- }
- else
- {
- CODE ("\x28\xCB") // sub bl, cl
- sets_flags[i] = true;
- }
- break;
-
- case INC:
- if (offset)
- {
- CODE ("\x80\x40") DB (offset) // add byte [rax+"offset"], "arg"
- }
- else
- {
- CODE ("\x80\xC3") // add bl, "arg"
- sets_flags[i] = true;
- }
- DB (arg)
- break;
- case DEC:
- if (offset)
- {
- CODE ("\x80\x68") DB (offset) // sub byte [rax+"offset"], "arg"
- }
- else
- {
- CODE ("\x80\xEB") // sub bl, "arg"
- sets_flags[i] = true;
- }
- DB (arg)
- break;
- case SET:
- if (offset)
- {
- CODE ("\xC6\x40") DB (offset) // mov byte [rax+"offset"], "arg"
- }
- else
- CODE ("\xB3") // mov bl, "arg"
- DB (arg)
- break;
-
- case OUT:
- CODE ("\xE8") DD (0) // call "write"
- break;
- case IN:
- CODE ("\xE8") DD (0) // call "read"
- break;
-
- case BEGIN:
- // Don't test the register when the flag has been set already;
- // this doesn't have much of an effect in practice
- if (!i || !sets_flags[i - 1])
- CODE ("\x84\xDB") // test bl, bl
- CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[arg]"
- break;
- case END:
- // We know that the cell is zero, make this an "if", not a "loop";
- // this doesn't have much of an effect in practice
- if (!arg)
- break;
-
- if (!i || !sets_flags[i - 1])
- CODE ("\x84\xDB") // test bl, bl
- CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[arg]"
- break;
- }
-
- // No sense in reading it out when we overwrite it immediately;
- // this doesn't have much of an effect in practice
- if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
- if (i + 1 >= irb_len
- || irb[i + 1].cmd != SET
- || irb[i + 1].offset != 0)
- CODE ("\x8A\x18") // mov bl, [rax]
- }
- // When there is a loop at the end we need to be able to jump past it
- offsets[irb_len] = buffer.len;
-
- // Write an epilog which handles all the OS interfacing
- //
- // System V x86-64 ABI:
- // rax <-> both syscall number and return value
- // args -> rdi, rsi, rdx, r10, r8, r9
- // trashed <- rcx, r11
-
- enum { SYS_READ = 0, SYS_WRITE = 1, SYS_EXIT = 60 };
-
- CODE ("\xB8") DD (SYS_EXIT) // mov eax, 0x3c
- CODE ("\x48\x31\xFF") // xor rdi, rdi
- CODE ("\x0F\x05") // syscall
-
- size_t fatal_offset = buffer.len;
- CODE ("\x48\x89\xF7") // mov rdi, rsi -- use the string in rsi
- CODE ("\x30\xC0") // xor al, al -- look for the nil byte
- CODE ("\x48\x31\xC9") // xor rcx, rcx
- CODE ("\x48\xF7\xD1") // not rcx -- start from -1
- CODE ("\xFC" "\xF2\xAE") // cld; repne scasb -- decrement until found
- CODE ("\x48\xF7\xD1") // not rcx
- CODE ("\x48\x8D\x51\xFF") // lea rdx, [rcx-1] -- save length in rdx
- CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
- CODE ("\xBF") DD (2) // mov edi, "STDERR_FILENO"
- CODE ("\x0F\x05") // syscall
-
- CODE ("\xB8") DD (SYS_EXIT) // mov eax, "SYS_EXIT"
- CODE ("\xBF") DD (1) // mov edi, "EXIT_FAILURE"
- CODE ("\x0F\x05") // syscall
-
- size_t read_offset = buffer.len;
- CODE ("\x50") // push rax -- save tape position
- CODE ("\xB8") DD (SYS_READ) // mov eax, "SYS_READ"
- CODE ("\x48\x89\xC7") // mov rdi, rax -- STDIN_FILENO
- CODE ("\x66\x6A\x00") // push word 0 -- the default value for EOF
- CODE ("\x48\x89\xE6") // mov rsi, rsp -- the char starts at rsp
- CODE ("\xBA") DD (1) // mov edx, 1 -- count
- CODE ("\x0F\x05") // syscall
- CODE ("\x66\x5B") // pop bx
-
- CODE ("\x48\x83\xF8\x00") // cmp rax, 0
- CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel read_message]
- CODE ("\x7C") // jl "fatal_offset" -- write failure message
- DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1))
- CODE ("\x58") // pop rax -- restore tape position
- CODE ("\xC3") // ret
- CODE ("fatal: read failed\n\0")
-
- size_t write_offset = buffer.len;
- CODE ("\x50") // push rax -- save tape position
- CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
- CODE ("\x48\x89\xC7") // mov rdi, rax -- STDOUT_FILENO
- CODE ("\x66\x53") // push bx
- CODE ("\x48\x89\xE6") // mov rsi, rsp -- the char starts at rsp
- CODE ("\xBA") DD (1) // mov edx, 1 -- count
- CODE ("\x0F\x05") // syscall
- CODE ("\x66\x5B") // pop bx
-
- CODE ("\x48\x83\xF8\x00") // cmp rax, 0
- CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel write_message]
- CODE ("\x7C") // jl "fatal_offset" -- write failure message
- DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1))
- CODE ("\x58") // pop rax -- restore tape position
- CODE ("\xC3") // ret
- CODE ("fatal: write failed\n\0")
-
- // Now that we know where each instruction is, fill in relative jumps
- for (size_t i = 0; i < irb_len; i++)
- {
- if (!irb[i].arg)
- continue;
-
- // This must accurately reflect the code generators
- intptr_t target, fixup = offsets[i];
- if (irb[i].cmd == BEGIN || irb[i].cmd == END)
- {
- fixup += (i && sets_flags[i - 1]) ? 2 : 4;
- target = offsets[irb[i].arg];
- }
- else if (irb[i].cmd == IN) { fixup++; target = read_offset; }
- else if (irb[i].cmd == OUT) { fixup++; target = write_offset; }
- else continue;
-
- uint64_t v = target - (fixup + 4);
- memcpy (buffer.str + fixup, LE (v), 4);
- }
- free (offsets);
- free (sets_flags);
-
-// - - Output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- // Now that we know how long the machine code is, we can write the header.
- // Note that for PIE we would need to depend on the dynamic linker, so no.
- //
- // Recommended reading:
- // http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
- // man 5 elf
-
- struct str code = buffer;
- str_init (&buffer);
-
- enum
- {
- ELF_HEADER_SIZE = 64, // size of the ELF header
- ELF_PROGRAM_ENTRY_SIZE = 56, // size of a program header
- ELF_SECTION_ENTRY_SIZE = 64, // size of a section header
- ELF_META_SIZE = ELF_HEADER_SIZE + 2 * ELF_PROGRAM_ENTRY_SIZE
- };
-
- // ELF header
- CODE ("\x7F" "ELF\x02\x01\x01") // ELF, 64-bit, little endian, v1
- CODE ("\x00\x00" "\0\0\0\0\0\0\0") // Unix System V ABI, v0, padding
- DW (2) DW (62) DD (1) // executable, x86-64, v1
- DQ (ELF_LOAD_CODE + ELF_META_SIZE) // entry point address
- DQ (ELF_HEADER_SIZE) DQ (0) // program, section header offset
- DD (0) // no processor-specific flags
- DW (ELF_HEADER_SIZE) // ELF header size
- DW (ELF_PROGRAM_ENTRY_SIZE) DW (2) // program hdr tbl entry size, count
- DW (ELF_SECTION_ENTRY_SIZE) DW (0) // section hdr tbl entry size, count
- DW (0) // no section index for strings
-
- // Program header for code
- // The entry point address seems to require alignment, so map start of file
- DD (1) DD (5) // PT_LOAD, PF_R | PF_X
- DQ (0) // offset within the file
- DQ (ELF_LOAD_CODE) // address in virtual memory
- DQ (ELF_LOAD_CODE) // address in physical memory
- DQ (ELF_META_SIZE + code.len) // length within the file
- DQ (ELF_META_SIZE + code.len) // length within memory
- DQ (4096) // segment alignment
-
- // Program header for the tape
- DD (1) DD (6) // PT_LOAD, PF_R | PF_W
- DQ (0) // offset within the file
- DQ (ELF_LOAD_DATA) // address in virtual memory
- DQ (ELF_LOAD_DATA) // address in physical memory
- DQ (0) // length within the file
- DQ (1 << 20) // one megabyte of memory
- DQ (4096) // segment alignment
-
- // The section header table is optional and we don't need it for anything
-
- FILE *output_file;
-#ifdef __unix__
- int output_fd;
- if ((output_fd = open (output_path, O_CREAT | O_WRONLY, 0777)) < 0)
- exit_fatal ("open: %s: %s\n", output_path, strerror (errno));
- if (!(output_file = fdopen (output_fd, "w")))
- exit_fatal ("fdopen: %s\n", strerror (errno));
-#else
- if (!(output_file = fopen (output_path, "w")))
- exit_fatal ("fopen: %s: %s\n", output_path, strerror (errno));
-#endif
-
- fwrite (buffer.str, buffer.len, 1, output_file);
- fwrite (code.str, code.len, 1, output_file);
- fclose (output_file);
- return 0;
-}
diff --git a/bfc-amd64.c b/bfc-amd64.c
new file mode 100644
index 0000000..9681ca1
--- /dev/null
+++ b/bfc-amd64.c
@@ -0,0 +1,744 @@
+// This is an exercise in futility more than anything else
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef __unix__
+#include
+#endif
+
+#define exit_fatal(...) \
+ do { \
+ fprintf (stderr, "fatal: " __VA_ARGS__); \
+ exit (EXIT_FAILURE); \
+ } while (0)
+
+// --- Safe memory management --------------------------------------------------
+
+static void *
+xcalloc (size_t m, size_t n)
+{
+ void *p = calloc (m, n);
+ if (!p)
+ exit_fatal ("calloc: %s\n", strerror (errno));
+ return p;
+}
+
+static void *
+xrealloc (void *o, size_t n)
+{
+ void *p = realloc (o, n);
+ if (!p && n)
+ exit_fatal ("realloc: %s\n", strerror (errno));
+ return p;
+}
+
+// --- Dynamically allocated strings -------------------------------------------
+
+struct str
+{
+ char *str; ///< String data, null terminated
+ size_t alloc; ///< How many bytes are allocated
+ size_t len; ///< How long the string actually is
+};
+
+static void
+str_init (struct str *self)
+{
+ self->len = 0;
+ self->str = xcalloc (1, (self->alloc = 16));
+}
+
+static void
+str_ensure_space (struct str *self, size_t n)
+{
+ // We allocate at least one more byte for the terminating null character
+ size_t new_alloc = self->alloc;
+ while (new_alloc <= self->len + n)
+ new_alloc <<= 1;
+ if (new_alloc != self->alloc)
+ self->str = xrealloc (self->str, (self->alloc = new_alloc));
+}
+
+static void
+str_append_data (struct str *self, const void *data, size_t n)
+{
+ str_ensure_space (self, n);
+ memcpy (self->str + self->len, data, n);
+ self->str[self->len += n] = '\0';
+}
+
+static void
+str_append_c (struct str *self, char c)
+{
+ str_append_data (self, &c, 1);
+}
+
+// --- Application -------------------------------------------------------------
+
+enum command
+{
+ RIGHT, LEFT, INC, DEC, IN, OUT, BEGIN, END,
+ SET, EAT, INCACC, DECACC
+};
+
+bool grouped[] = { 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+struct instruction { enum command cmd; int offset; size_t arg; };
+#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) }
+
+// - - Debugging - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#ifdef DEBUG
+static void
+debug_dump_instruction (FILE *fp, const struct instruction *in)
+{
+ const char *name;
+ switch (in->cmd)
+ {
+ case RIGHT: name = "RIGHT "; break;
+ case LEFT: name = "LEFT "; break;
+ case INC: name = "INC "; break;
+ case DEC: name = "DEC "; break;
+ case OUT: name = "OUT "; break;
+ case IN: name = "IN "; break;
+ case BEGIN: name = "BEGIN "; break;
+ case END: name = "END "; break;
+ case SET: name = "SET "; break;
+ case EAT: name = "EAT "; break;
+ case INCACC: name = "INCACC"; break;
+ case DECACC: name = "DECACC"; break;
+ }
+ fprintf (fp, "%s %zu", name, in->arg);
+ if (in->offset != 0)
+ fprintf (fp, " [%d]", in->offset);
+ fprintf (fp, "\n");
+}
+
+static void
+debug_dump (const char *filename, struct instruction *in, size_t len)
+{
+ FILE *fp = fopen (filename, "w");
+ long indent = 0;
+ for (size_t i = 0; i < len; i++)
+ {
+ if (in[i].cmd == END)
+ indent--;
+ for (long k = 0; k < indent; k++)
+ fputs (" ", fp);
+ debug_dump_instruction (fp, &in[i]);
+ if (in[i].cmd == BEGIN)
+ indent++;
+ }
+ fclose (fp);
+}
+#else
+#define debug_dump(...)
+#endif
+
+// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+static size_t
+optimize_assignment (struct instruction *irb, size_t irb_len)
+{
+ size_t in = 0, out = 0;
+ for (; in < irb_len; in++, out++)
+ {
+ if (in + 2 < irb_len
+ && irb[in ].cmd == BEGIN
+ && irb[in + 1].cmd == DEC && irb[in + 1].arg == 1
+ && irb[in + 2].cmd == END)
+ {
+ irb[out] = INSTRUCTION (SET, 0, 0);
+ in += 2;
+ }
+ else if (out && irb[out - 1].cmd == SET && irb[in].cmd == INC)
+ irb[--out].arg += irb[in].arg;
+ else if (out != in)
+ irb[out] = irb[in];
+ }
+ return out;
+}
+
+// Add the offset of the LEFT/RIGHT instruction to the accumulator
+static bool
+add_direction_offset (struct instruction *irb, intptr_t *acc)
+{
+ if (irb->cmd == RIGHT)
+ *acc += irb->arg;
+ else if (irb->cmd == LEFT)
+ *acc -= (intptr_t) irb->arg;
+ else
+ return false;
+ return true;
+}
+
+// Add offsets to INC/DEC/SET stuck between LEFT/RIGHT
+// and compress the LEFT/RIGHT sequences
+static size_t
+optimize_offseted_inc_dec (struct instruction *irb, size_t irb_len)
+{
+ size_t in = 0, out = 0;
+ for (in = 0, out = 0; in < irb_len; in++, out++)
+ {
+ intptr_t dir = 0;
+ if (!add_direction_offset (&irb[in], &dir))
+ {
+ irb[out] = irb[in];
+ continue;
+ }
+
+ while (in + 2 < irb_len)
+ {
+ // An immediate offset has its limits on x86-64
+ if (dir < INT8_MIN || dir > INT8_MAX)
+ break;
+ intptr_t diff = 0;
+ if (!add_direction_offset (&irb[in + 2], &diff))
+ break;
+ int cmd = irb[in + 1].cmd;
+ if (cmd != INC && cmd != DEC && cmd != SET)
+ break;
+
+ irb[out] = irb[in + 1];
+ irb[out].offset = dir;
+
+ dir += diff;
+ out += 1;
+ in += 2;
+ }
+
+ for (; in + 1 < irb_len; in++)
+ if (!add_direction_offset (&irb[in + 1], &dir))
+ break;
+
+ if (!dir)
+ out--;
+ else if (dir > 0)
+ irb[out] = INSTRUCTION (RIGHT, 0, dir);
+ else
+ irb[out] = INSTRUCTION (LEFT, 0, -dir);
+ }
+ return out;
+}
+
+// Try to eliminate loops that eat a cell and add/subtract its value
+// to/from some other cell
+static size_t
+optimize_inc_dec_loops (struct instruction *irb, size_t irb_len)
+{
+ size_t in = 0, out = 0;
+ for (in = 0, out = 0; in < irb_len; in++, out++)
+ {
+ irb[out] = irb[in];
+ if (irb[in].cmd != BEGIN)
+ continue;
+
+ bool ok = false;
+ size_t count = 0;
+ for (size_t k = in + 1; k < irb_len; k++)
+ {
+ if (irb[k].cmd == END)
+ {
+ ok = true;
+ break;
+ }
+ if (irb[k].cmd != INC
+ && irb[k].cmd != DEC)
+ break;
+ count++;
+ }
+ if (!ok)
+ continue;
+
+ // Stable sort operations by their offsets, put [0] first
+ bool sorted;
+ do
+ {
+ sorted = true;
+ for (size_t k = 1; k < count; k++)
+ {
+ if (irb[in + k].offset == 0)
+ continue;
+ if (irb[in + k + 1].offset != 0
+ && irb[in + k].offset <= irb[in + k + 1].offset)
+ continue;
+
+ struct instruction tmp = irb[in + k + 1];
+ irb[in + k + 1] = irb[in + k];
+ irb[in + k] = tmp;
+ sorted = false;
+ }
+ }
+ while (!sorted);
+
+ // Abort the optimization on duplicate offsets (complication with [0])
+ for (size_t k = 1; k < count; k++)
+ if (irb[in + k].offset == irb[in + k + 1].offset)
+ ok = false;
+ // XXX: can't make the code longer either
+ for (size_t k = 1; k <= count; k++)
+ if (irb[in + k].arg != 1)
+ ok = false;
+ if (!ok
+ || irb[in + 1].cmd != DEC
+ || irb[in + 1].offset != 0)
+ continue;
+
+ int min_safe_left_offset = 0;
+ if (in > 1 && irb[in - 1].cmd == RIGHT)
+ min_safe_left_offset = -irb[in - 1].arg;
+
+ bool cond_needed_for_safety = false;
+ for (size_t k = 0; k < count; k++)
+ if (irb[in + k + 1].offset < min_safe_left_offset)
+ {
+ cond_needed_for_safety = true;
+ break;
+ }
+
+ in++;
+ if (cond_needed_for_safety)
+ out++;
+
+ irb[out] = INSTRUCTION (EAT, 0, 0);
+ for (size_t k = 1; k < count; k++)
+ irb[out + k] = INSTRUCTION (irb[in + k].cmd == INC
+ ? INCACC : DECACC, irb[in + k].offset, 0);
+
+ in += count;
+ out += count;
+
+ if (cond_needed_for_safety)
+ irb[out] = INSTRUCTION (END, 0, 0);
+ else
+ out--;
+ }
+ return out;
+}
+
+// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+static void
+pair_loops (struct instruction *irb, size_t irb_len)
+{
+ size_t nesting = 0;
+ size_t *stack = xcalloc (sizeof *stack, irb_len);
+ for (size_t i = 0; i < irb_len; i++)
+ {
+ switch (irb[i].cmd)
+ {
+ case BEGIN:
+ stack[nesting++] = i;
+ break;
+ case END:
+ if (nesting <= 0)
+ exit_fatal ("unbalanced loops\n");
+
+ --nesting;
+ irb[stack[nesting]].arg = i + 1;
+
+ // Looping can be disabled by optimizations
+ if (irb[i].arg)
+ irb[i].arg = stack[nesting] + 1;
+ default:
+ break;
+ }
+ }
+ free (stack);
+
+ if (nesting != 0)
+ exit_fatal ("unbalanced loops\n");
+}
+
+// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+int
+main (int argc, char *argv[])
+{
+ if (argc > 3)
+ exit_fatal ("usage: %s [INPUT-FILE] [OUTPUT-FILE]\n", argv[0]);
+
+ FILE *input_file = stdin;
+ if (argc > 1 && !(input_file = fopen (argv[1], "r")))
+ exit_fatal ("fopen: %s: %s\n", argv[1], strerror (errno));
+
+ const char *output_path = "a.out";
+ if (argc > 2)
+ output_path = argv[2];
+
+ struct str buffer;
+ str_init (&buffer);
+
+ int c;
+ while ((c = fgetc (input_file)) != EOF)
+ str_append_c (&buffer, c);
+ if (ferror (input_file))
+ exit_fatal ("can't read program\n");
+ fclose (input_file);
+
+// - - Decode, group and optimize - - - - - - - - - - - - - - - - - - - - - - -
+
+ // This is our Intermediate Representation Buffer
+ struct instruction *irb = xcalloc (sizeof *irb, buffer.len);
+ size_t irb_len = 0;
+
+ for (size_t i = 0; i < buffer.len; i++)
+ {
+ enum command cmd;
+ switch (buffer.str[i])
+ {
+ case '>': cmd = RIGHT; break;
+ case '<': cmd = LEFT; break;
+ case '+': cmd = INC; break;
+ case '-': cmd = DEC; break;
+ case '.': cmd = OUT; break;
+ case ',': cmd = IN; break;
+ case '[': cmd = BEGIN; break;
+ case ']': cmd = END; break;
+ default: continue;
+ }
+
+ // The most basic optimization is to group identical commands together
+ if (!irb_len || !grouped[cmd] || irb[irb_len - 1].cmd != cmd)
+ irb_len++;
+
+ irb[irb_len - 1].cmd = cmd;
+ irb[irb_len - 1].arg++;
+ }
+
+ debug_dump ("bf-no-opt.txt", irb, irb_len);
+ irb_len = optimize_assignment (irb, irb_len);
+ debug_dump ("bf-pre-offsets.txt", irb, irb_len);
+ irb_len = optimize_offseted_inc_dec (irb, irb_len);
+ debug_dump ("bf-pre-incdec-unloop.txt", irb, irb_len);
+ irb_len = optimize_inc_dec_loops (irb, irb_len);
+ debug_dump ("bf-optimized.txt", irb, irb_len);
+ pair_loops (irb, irb_len);
+ debug_dump ("bf-final.txt", irb, irb_len);
+
+// - - Code generation - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+ str_init (&buffer);
+ size_t *offsets = xcalloc (sizeof *offsets, irb_len + 1);
+ bool *sets_flags = xcalloc (sizeof *sets_flags, irb_len);
+
+#define CODE(x) { char t[] = x; str_append_data (&buffer, t, sizeof t - 1); }
+#define LE(v) (uint8_t[]) { v, v>>8, v>>16, v>>24, v>>32, v>>40, v>>48, v>>56 }
+#define DB(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 1); }
+#define DW(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 2); }
+#define DD(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 4); }
+#define DQ(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 8); }
+
+ enum
+ {
+ ELF_LOAD_CODE = 0x400000, // where code is loaded (usual)
+ ELF_LOAD_DATA = 0x800000 // where the tape is placed
+ };
+
+ CODE ("\xB8") DD (ELF_LOAD_DATA) // mov rax, "ELF_LOAD_DATA"
+ CODE ("\x30\xDB") // xor bl, bl
+
+ for (size_t i = 0; i < irb_len; i++)
+ {
+ offsets[i] = buffer.len;
+
+ size_t arg = irb[i].arg;
+ assert (arg <= UINT32_MAX);
+
+ int offset = irb[i].offset;
+ assert (offset <= INT8_MAX && offset >= INT8_MIN);
+
+ // Don't save what we've just loaded
+ if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
+ if (i < 2 || i + 1 >= irb_len
+ || (irb[i - 2].cmd != LEFT && irb[i - 2].cmd != RIGHT)
+ || irb[i - 1].cmd != BEGIN
+ || irb[i + 1].cmd != END)
+ CODE ("\x88\x18") // mov [rax], bl
+
+ switch (irb[i].cmd)
+ {
+ case RIGHT:
+ // add rax, "arg" -- optimistic, no boundary checking
+ if (arg > INT8_MAX) { CODE ("\x48\x05") DD (arg) }
+ else { CODE ("\x48\x83\xC0") DB (arg) }
+ break;
+ case LEFT:
+ // sub rax, "arg" -- optimistic, no boundary checking
+ if (arg > INT8_MAX) { CODE ("\x48\x2D") DD (arg) }
+ else { CODE ("\x48\x83\xE8") DB (arg) }
+ break;
+
+ case EAT:
+ // NOTE: the kernel destroys rcx and r11 on syscalls,
+ // there must be no OUT or IN between EAT and INCACC/DECACC
+ CODE ("\x88\xD9" "\x30\xDB") // mov cl, bl; xor bl, bl
+ sets_flags[i] = true;
+ break;
+ case INCACC:
+ if (offset)
+ {
+ CODE ("\x00\x48") DB (offset) // add [rax+"offset"], cl
+ }
+ else
+ {
+ CODE ("\x00\xCB") // add bl, cl
+ sets_flags[i] = true;
+ }
+ break;
+ case DECACC:
+ if (offset)
+ {
+ CODE ("\x28\x48") DB (offset) // sub [rax+"offset"], cl
+ }
+ else
+ {
+ CODE ("\x28\xCB") // sub bl, cl
+ sets_flags[i] = true;
+ }
+ break;
+
+ case INC:
+ if (offset)
+ {
+ CODE ("\x80\x40") DB (offset) // add byte [rax+"offset"], "arg"
+ }
+ else
+ {
+ CODE ("\x80\xC3") // add bl, "arg"
+ sets_flags[i] = true;
+ }
+ DB (arg)
+ break;
+ case DEC:
+ if (offset)
+ {
+ CODE ("\x80\x68") DB (offset) // sub byte [rax+"offset"], "arg"
+ }
+ else
+ {
+ CODE ("\x80\xEB") // sub bl, "arg"
+ sets_flags[i] = true;
+ }
+ DB (arg)
+ break;
+ case SET:
+ if (offset)
+ {
+ CODE ("\xC6\x40") DB (offset) // mov byte [rax+"offset"], "arg"
+ }
+ else
+ CODE ("\xB3") // mov bl, "arg"
+ DB (arg)
+ break;
+
+ case OUT:
+ CODE ("\xE8") DD (0) // call "write"
+ break;
+ case IN:
+ CODE ("\xE8") DD (0) // call "read"
+ break;
+
+ case BEGIN:
+ // Don't test the register when the flag has been set already;
+ // this doesn't have much of an effect in practice
+ if (!i || !sets_flags[i - 1])
+ CODE ("\x84\xDB") // test bl, bl
+ CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[arg]"
+ break;
+ case END:
+ // We know that the cell is zero, make this an "if", not a "loop";
+ // this doesn't have much of an effect in practice
+ if (!arg)
+ break;
+
+ if (!i || !sets_flags[i - 1])
+ CODE ("\x84\xDB") // test bl, bl
+ CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[arg]"
+ break;
+ }
+
+ // No sense in reading it out when we overwrite it immediately;
+ // this doesn't have much of an effect in practice
+ if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
+ if (i + 1 >= irb_len
+ || irb[i + 1].cmd != SET
+ || irb[i + 1].offset != 0)
+ CODE ("\x8A\x18") // mov bl, [rax]
+ }
+ // When there is a loop at the end we need to be able to jump past it
+ offsets[irb_len] = buffer.len;
+
+ // Write an epilog which handles all the OS interfacing
+ //
+ // System V x86-64 ABI:
+ // rax <-> both syscall number and return value
+ // args -> rdi, rsi, rdx, r10, r8, r9
+ // trashed <- rcx, r11
+
+#ifdef TARGET_OPENBSD
+ enum { SYS_READ = 3, SYS_WRITE = 4, SYS_EXIT = 1 };
+#elif defined TARGET_LINUX
+ enum { SYS_READ = 0, SYS_WRITE = 1, SYS_EXIT = 60 };
+#else
+#error Target not supported
+#endif
+
+ CODE ("\xB8") DD (SYS_EXIT) // mov eax, 0x3c
+ CODE ("\x48\x31\xFF") // xor rdi, rdi
+ CODE ("\x0F\x05") // syscall
+
+ size_t fatal_offset = buffer.len;
+ CODE ("\x48\x89\xF7") // mov rdi, rsi -- use the string in rsi
+ CODE ("\x30\xC0") // xor al, al -- look for the nil byte
+ CODE ("\x48\x31\xC9") // xor rcx, rcx
+ CODE ("\x48\xF7\xD1") // not rcx -- start from -1
+ CODE ("\xFC" "\xF2\xAE") // cld; repne scasb -- decrement until found
+ CODE ("\x48\xF7\xD1") // not rcx
+ CODE ("\x48\x8D\x51\xFF") // lea rdx, [rcx-1] -- save length in rdx
+ CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
+ CODE ("\xBF") DD (2) // mov edi, "STDERR_FILENO"
+ CODE ("\x0F\x05") // syscall
+
+ CODE ("\xB8") DD (SYS_EXIT) // mov eax, "SYS_EXIT"
+ CODE ("\xBF") DD (1) // mov edi, "EXIT_FAILURE"
+ CODE ("\x0F\x05") // syscall
+
+ size_t read_offset = buffer.len;
+ CODE ("\x50") // push rax -- save tape position
+ CODE ("\xB8") DD (SYS_READ) // mov eax, "SYS_READ"
+ CODE ("\xBF") DD (0) // mov edi, "STDIN_FILENO"
+ CODE ("\x66\x6A\x00") // push word 0 -- the default value for EOF
+ CODE ("\x48\x89\xE6") // mov rsi, rsp -- the char starts at rsp
+ CODE ("\xBA") DD (1) // mov edx, 1 -- count
+ CODE ("\x0F\x05") // syscall
+ CODE ("\x66\x5B") // pop bx
+
+ CODE ("\x48\x83\xF8\x00") // cmp rax, 0
+ CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel read_message]
+ CODE ("\x7C") // jl "fatal_offset" -- write failure message
+ DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1))
+ CODE ("\x58") // pop rax -- restore tape position
+ CODE ("\xC3") // ret
+ CODE ("fatal: read failed\n\0")
+
+ size_t write_offset = buffer.len;
+ CODE ("\x50") // push rax -- save tape position
+ CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
+ CODE ("\xBF") DD (1) // mov edi, "STDOUT_FILENO"
+ CODE ("\x66\x53") // push bx
+ CODE ("\x48\x89\xE6") // mov rsi, rsp -- the char starts at rsp
+ CODE ("\xBA") DD (1) // mov edx, 1 -- count
+ CODE ("\x0F\x05") // syscall
+ CODE ("\x66\x5B") // pop bx
+
+ CODE ("\x48\x83\xF8\x00") // cmp rax, 0
+ CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel write_message]
+ CODE ("\x7C") // jl "fatal_offset" -- write failure message
+ DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1))
+ CODE ("\x58") // pop rax -- restore tape position
+ CODE ("\xC3") // ret
+ CODE ("fatal: write failed\n\0")
+
+ // Now that we know where each instruction is, fill in relative jumps
+ for (size_t i = 0; i < irb_len; i++)
+ {
+ if (!irb[i].arg)
+ continue;
+
+ // This must accurately reflect the code generators
+ intptr_t target, fixup = offsets[i];
+ if (irb[i].cmd == BEGIN || irb[i].cmd == END)
+ {
+ fixup += (i && sets_flags[i - 1]) ? 2 : 4;
+ target = offsets[irb[i].arg];
+ }
+ else if (irb[i].cmd == IN) { fixup++; target = read_offset; }
+ else if (irb[i].cmd == OUT) { fixup++; target = write_offset; }
+ else continue;
+
+ uint64_t v = target - (fixup + 4);
+ memcpy (buffer.str + fixup, LE (v), 4);
+ }
+ free (offsets);
+ free (sets_flags);
+
+// - - Output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+ // Now that we know how long the machine code is, we can write the header.
+ // Note that for PIE we would need to depend on the dynamic linker, so no.
+ //
+ // Recommended reading:
+ // http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
+ // man 5 elf
+
+ struct str code = buffer;
+ str_init (&buffer);
+
+ enum
+ {
+ ELF_HEADER_SIZE = 64, // size of the ELF header
+ ELF_PROGRAM_ENTRY_SIZE = 56, // size of a program header
+ ELF_SECTION_ENTRY_SIZE = 64, // size of a section header
+ ELF_META_SIZE = ELF_HEADER_SIZE + 2 * ELF_PROGRAM_ENTRY_SIZE
+ };
+
+ // ELF header
+ CODE ("\x7F" "ELF\x02\x01\x01") // ELF, 64-bit, little endian, v1
+#ifdef TARGET_OPENBSD
+ // OpenBSD either requires its ABI or a PT_NOTE with "OpenBSD" in it
+ CODE ("\x0C\x00" "\0\0\0\0\0\0\0") // OpenBSD ABI, v0, padding
+#else
+ CODE ("\x00\x00" "\0\0\0\0\0\0\0") // Unix System V ABI, v0, padding
+#endif
+ DW (2) DW (62) DD (1) // executable, x86-64, v1
+ DQ (ELF_LOAD_CODE + ELF_META_SIZE) // entry point address
+ DQ (ELF_HEADER_SIZE) DQ (0) // program, section header offset
+ DD (0) // no processor-specific flags
+ DW (ELF_HEADER_SIZE) // ELF header size
+ DW (ELF_PROGRAM_ENTRY_SIZE) DW (2) // program hdr tbl entry size, count
+ DW (ELF_SECTION_ENTRY_SIZE) DW (0) // section hdr tbl entry size, count
+ DW (0) // no section index for strings
+
+ // Program header for code
+ // The entry point address seems to require alignment, so map start of file
+ DD (1) DD (5) // PT_LOAD, PF_R | PF_X
+ DQ (0) // offset within the file
+ DQ (ELF_LOAD_CODE) // address in virtual memory
+ DQ (ELF_LOAD_CODE) // address in physical memory
+ DQ (ELF_META_SIZE + code.len) // length within the file
+ DQ (ELF_META_SIZE + code.len) // length within memory
+ DQ (4096) // segment alignment
+
+ // Program header for the tape
+ DD (1) DD (6) // PT_LOAD, PF_R | PF_W
+ DQ (0) // offset within the file
+ DQ (ELF_LOAD_DATA) // address in virtual memory
+ DQ (ELF_LOAD_DATA) // address in physical memory
+ DQ (0) // length within the file
+ DQ (1 << 20) // one megabyte of memory
+ DQ (4096) // segment alignment
+
+ // The section header table is optional and we don't need it for anything
+
+ FILE *output_file;
+#ifdef __unix__
+ int output_fd;
+ if ((output_fd = open (output_path, O_CREAT | O_WRONLY, 0777)) < 0)
+ exit_fatal ("open: %s: %s\n", output_path, strerror (errno));
+ if (!(output_file = fdopen (output_fd, "w")))
+ exit_fatal ("fdopen: %s\n", strerror (errno));
+#else
+ if (!(output_file = fopen (output_path, "w")))
+ exit_fatal ("fopen: %s: %s\n", output_path, strerror (errno));
+#endif
+
+ fwrite (buffer.str, buffer.len, 1, output_file);
+ fwrite (code.str, code.len, 1, output_file);
+ fclose (output_file);
+ return 0;
+}
--
cgit v1.2.3-70-g09d2