Skip to content

Instantly share code, notes, and snippets.

@theMackabu
Created February 10, 2026 23:07
Show Gist options
  • Select an option

  • Save theMackabu/9f27a8670d380405c4e468b073552bb9 to your computer and use it in GitHub Desktop.

Select an option

Save theMackabu/9f27a8670d380405c4e468b073552bb9 to your computer and use it in GitHub Desktop.
bnuy
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef enum {
OP_CONST, // push constant
OP_LOAD, // load local variable
OP_ADD, // add top two values
OP_CALL, // call function
OP_RETURN, // return from function
OP_EXTERN, // call external function
OP_HALT // stop execution
} Opcode;
typedef enum {
TOK_EOF, TOK_FN, TOK_RETURN, TOK_IDENT, TOK_NUMBER,
TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE,
TOK_COMMA, TOK_SEMICOLON, TOK_PLUS, TOK_DOT
} TokenType;
typedef struct {
TokenType type;
char* value;
int num_value;
} Token;
typedef struct {
const char* src;
int pos;
Token current;
} Lexer;
typedef struct {
int value;
} Value;
typedef struct {
Opcode op;
int operand;
} Instruction;
typedef struct {
char* name;
int addr;
int params;
char** param_names;
} Function;
typedef struct VM VM;
typedef void (*ExternFn)(VM* vm);
typedef struct {
char* name;
ExternFn fn;
int params;
} ExternFunc;
struct VM {
Instruction* code;
int code_size;
int entry_point;
Value* stack;
int sp;
int* call_stack;
int csp;
Function* functions;
int func_count;
ExternFunc* externs;
int extern_count;
void (*run)(struct VM* vm);
};
typedef struct {
char* data;
size_t len;
size_t cap;
} StringBuilder;
static void push(VM* vm, Value v);
static Value pop(VM* vm);
static void parse_expr(Lexer* lex, VM* vm, Function* current_func);
static void skip_whitespace(Lexer* lex) {
while (lex->src[lex->pos] && isspace(lex->src[lex->pos])) lex->pos++;
}
Token next_token(Lexer* lex) {
skip_whitespace(lex);
Token tok = {0};
if (!lex->src[lex->pos]) {
tok.type = TOK_EOF;
return tok;
}
char c = lex->src[lex->pos];
if (isalpha(c)) {
int start = lex->pos;
while (isalnum(lex->src[lex->pos]) || lex->src[lex->pos] == '_') {
lex->pos++;
}
int len = lex->pos - start;
tok.value = malloc(len + 1);
strncpy(tok.value, &lex->src[start], len);
tok.value[len] = '\0';
if (strcmp(tok.value, "fn") == 0) tok.type = TOK_FN;
else if (strcmp(tok.value, "return") == 0) tok.type = TOK_RETURN;
else tok.type = TOK_IDENT;
return tok;
}
if (isdigit(c)) {
int start = lex->pos;
while (isdigit(lex->src[lex->pos])) {
lex->pos++;
}
tok.type = TOK_NUMBER;
tok.num_value = atoi(&lex->src[start]);
return tok;
}
lex->pos++;
switch (c) {
case '(': tok.type = TOK_LPAREN; break;
case ')': tok.type = TOK_RPAREN; break;
case '{': tok.type = TOK_LBRACE; break;
case '}': tok.type = TOK_RBRACE; break;
case ',': tok.type = TOK_COMMA; break;
case ';': tok.type = TOK_SEMICOLON; break;
case '+': tok.type = TOK_PLUS; break;
case '.': tok.type = TOK_DOT; break;
}
return tok;
}
Lexer* lexer_create(const char* src) {
Lexer* lex = malloc(sizeof(Lexer));
lex->src = src;
lex->pos = 0;
lex->current = next_token(lex);
return lex;
}
void advance(Lexer* lex) {
if (lex->current.value) free(lex->current.value);
lex->current = next_token(lex);
}
void sb_init(StringBuilder* sb) {
sb->cap = 64;
sb->len = 0;
sb->data = malloc(sb->cap);
sb->data[0] = '\0';
}
void sb_append(StringBuilder* sb, const char* str) {
size_t str_len = strlen(str);
if (sb->len + str_len + 1 > sb->cap) {
sb->cap = sb->len + str_len + 1;
sb->data = realloc(sb->data, sb->cap);
}
memcpy(sb->data + sb->len, str, str_len + 1);
sb->len += str_len;
}
char* parse_qualified_name(Lexer* lex) {
if (lex->current.type != TOK_IDENT) return NULL;
StringBuilder sb;
sb_init(&sb);
sb_append(&sb, lex->current.value);
advance(lex);
while (lex->current.type == TOK_DOT) {
advance(lex);
if (lex->current.type != TOK_IDENT) break;
sb_append(&sb, ".");
sb_append(&sb, lex->current.value);
advance(lex);
}
return sb.data;
}
void extern_println(VM* vm) {
Value v = pop(vm);
printf("%d\n", v.value);
Value ret = {0};
push(vm, ret);
}
void extern_bunny(VM* vm) {
printf("squeak\n");
Value ret = {0};
push(vm, ret);
}
VM* vm_create(int code_cap, int stack_cap) {
VM* vm = malloc(sizeof(VM));
vm->code = malloc(sizeof(Instruction) * code_cap);
vm->code_size = 0;
vm->entry_point = 0;
vm->stack = malloc(sizeof(Value) * stack_cap);
vm->sp = 0;
vm->call_stack = malloc(sizeof(int) * 64);
vm->csp = 0;
vm->functions = malloc(sizeof(Function) * 16);
vm->func_count = 0;
vm->externs = malloc(sizeof(ExternFunc) * 16);
vm->extern_count = 0;
vm->externs[vm->extern_count++] = (ExternFunc){
"std.io.println", extern_println, 1
};
vm->externs[vm->extern_count++] = (ExternFunc){
"bunny.squeak", extern_bunny, 0
};
return vm;
}
void emit(VM* vm, Opcode op, int operand) {
vm->code[vm->code_size].op = op;
vm->code[vm->code_size].operand = operand;
vm->code_size++;
}
int find_param(Function* func, const char* name) {
for (int i = 0; i < func->params; i++) {
if (strcmp(func->param_names[i], name) == 0) return func->params - 1 - i;
}
return -1;
}
int find_function(VM* vm, const char* name) {
for (int i = 0; i < vm->func_count; i++) {
if (strcmp(vm->functions[i].name, name) == 0) return i;
}
return -1;
}
int find_extern(VM* vm, const char* name) {
for (int i = 0; i < vm->extern_count; i++) {
if (strcmp(vm->externs[i].name, name) == 0) return i;
}
return -1;
}
void parse_call(Lexer* lex, VM* vm, Function* current_func, const char* name) {
advance(lex);
if (lex->current.type != TOK_RPAREN) {
parse_expr(lex, vm, current_func);
while (lex->current.type == TOK_COMMA) {
advance(lex);
parse_expr(lex, vm, current_func);
}
}
advance(lex);
int extern_id = find_extern(vm, name);
if (extern_id != -1) {
emit(vm, OP_EXTERN, extern_id);
} else {
int func_id = find_function(vm, name);
emit(vm, OP_CALL, func_id);
}
}
static void parse_expr(Lexer* lex, VM* vm, Function* current_func) {
if (lex->current.type == TOK_NUMBER) {
emit(vm, OP_CONST, lex->current.num_value);
advance(lex);
} else if (lex->current.type == TOK_IDENT) {
char* name = parse_qualified_name(lex);
if (lex->current.type == TOK_LPAREN) {
parse_call(lex, vm, current_func, name);
} else {
int offset = find_param(current_func, name);
emit(vm, OP_LOAD, offset);
}
free(name);
}
if (lex->current.type == TOK_PLUS) {
advance(lex);
parse_expr(lex, vm, current_func);
emit(vm, OP_ADD, 0);
}
}
void parse_function(Lexer* lex, VM* vm) {
advance(lex);
char* func_name = strdup(lex->current.value);
advance(lex);
advance(lex);
char** params = malloc(sizeof(char*) * 16);
int param_count = 0;
if (lex->current.type == TOK_IDENT) {
params[param_count++] = strdup(lex->current.value);
advance(lex);
while (lex->current.type == TOK_COMMA) {
advance(lex);
params[param_count++] = strdup(lex->current.value);
advance(lex);
}
}
advance(lex);
advance(lex);
Function func = {0};
func.name = func_name;
func.addr = vm->code_size;
func.params = param_count;
func.param_names = params;
vm->functions[vm->func_count++] = func;
while (lex->current.type != TOK_RBRACE) {
if (lex->current.type == TOK_RETURN) {
advance(lex);
parse_expr(lex, vm, &func);
emit(vm, OP_RETURN, 0);
advance(lex);
}
}
advance(lex);
}
void parse_program(Lexer* lex, VM* vm) {
while (lex->current.type == TOK_FN) parse_function(lex, vm);
vm->entry_point = vm->code_size;
while (lex->current.type != TOK_EOF) {
if (lex->current.type == TOK_IDENT) {
char* name = parse_qualified_name(lex);
if (lex->current.type == TOK_LPAREN) {
parse_call(lex, vm, NULL, name);
if (lex->current.type == TOK_SEMICOLON) advance(lex);
}
free(name);
} else advance(lex);
}
emit(vm, OP_HALT, 0);
}
static void push(VM* vm, Value v) { vm->stack[vm->sp++] = v; }
static Value pop(VM* vm) { return vm->stack[--vm->sp]; }
void run_vm(VM* vm) {
int pc = vm->entry_point;
int fp = 0;
static void* dispatch_table[] = {
&&op_const, &&op_load, &&op_add, &&op_call,
&&op_return, &&op_extern, &&op_halt
};
#define DISPATCH() goto *dispatch_table[vm->code[pc].op]
#define NEXT() pc++; DISPATCH()
DISPATCH();
op_const: {
Value v = {vm->code[pc].operand};
push(vm, v);
NEXT();
}
op_load: {
Value v = vm->stack[fp + vm->code[pc].operand];
push(vm, v);
NEXT();
}
op_add: {
Value b = pop(vm);
Value a = pop(vm);
Value result = {a.value + b.value};
push(vm, result);
NEXT();
}
op_call: {
Function func = vm->functions[vm->code[pc].operand];
vm->call_stack[vm->csp++] = pc + 1;
vm->call_stack[vm->csp++] = fp;
fp = vm->sp - func.params;
pc = func.addr;
DISPATCH();
}
op_return: {
Value ret = pop(vm);
vm->sp = fp;
fp = vm->call_stack[--vm->csp];
pc = vm->call_stack[--vm->csp];
push(vm, ret);
DISPATCH();
}
op_extern: {
ExternFunc ext = vm->externs[vm->code[pc].operand];
ext.fn(vm);
NEXT();
}
op_halt: return;
}
void print_bytecode(VM* vm) {
const char* names[] = {"CONST", "LOAD", "ADD", "CALL", "RETURN", "EXTERN", "HALT"};
printf("bytecode:\n");
for (int i = 0; i < vm->code_size; i++) {
printf("%3d: %-8s %d (0x%02x 0x%02x)\n",
i, names[vm->code[i].op], vm->code[i].operand,
vm->code[i].op, vm->code[i].operand);
}
printf("\n");
printf("hex dump:\n");
for (int i = 0; i < vm->code_size; i++) {
printf("%02x %02x ", vm->code[i].op, vm->code[i].operand);
if ((i + 1) % 8 == 0) printf("\n");
}
if (vm->code_size % 8 != 0) printf("\n");
printf("\n");
}
int main(int argc, char** argv) {
const char* source =
"fn add(a, b) {\n"
" return a + b;\n"
"}\n"
"bunny.squeak();\n"
"std.io.println(add(5, 10));";
int debug = argc > 1 && strcmp(argv[1], "-d") == 0;
if (debug) printf("source:\n%s\n\n", source);
VM* vm = vm_create(256, 256);
Lexer* lex = lexer_create(source);
parse_program(lex, vm);
if (debug) print_bytecode(vm);
run_vm(vm);
for (int i = 0; i < vm->func_count; i++) {
free(vm->functions[i].name);
for (int j = 0; j < vm->functions[i].params; j++) {
free(vm->functions[i].param_names[j]);
}
free(vm->functions[i].param_names);
}
free(vm->functions);
free(vm->externs);
if (lex->current.value) free(lex->current.value);
free(lex);
free(vm->code);
free(vm->stack);
free(vm->call_stack);
free(vm);
return 0;
}
const Opcode = {
CONST: 0,
LOAD: 1,
ADD: 2,
CALL: 3,
RETURN: 4,
EXTERN: 5,
HALT: 6
};
const TokenType = {
EOF: 0,
FN: 1,
RETURN: 2,
IDENT: 3,
NUMBER: 4,
LPAREN: 5,
RPAREN: 6,
LBRACE: 7,
RBRACE: 8,
COMMA: 9,
SEMICOLON: 10,
PLUS: 11,
DOT: 12
};
class Lexer {
constructor(src) {
this.src = src;
this.pos = 0;
this.current = this.nextToken();
}
skipWhitespace() {
while (this.pos < this.src.length && /\s/.test(this.src[this.pos])) {
this.pos++;
}
}
nextToken() {
this.skipWhitespace();
if (this.pos >= this.src.length) {
return { type: TokenType.EOF };
}
let c = this.src[this.pos];
if (/[a-zA-Z]/.test(c)) {
let start = this.pos;
while (this.pos < this.src.length && /[a-zA-Z0-9_]/.test(this.src[this.pos])) {
this.pos++;
}
let value = this.src.slice(start, this.pos);
if (value === 'fn') return { type: TokenType.FN, value };
if (value === 'return') return { type: TokenType.RETURN, value };
return { type: TokenType.IDENT, value };
}
if (/[0-9]/.test(c)) {
let start = this.pos;
while (this.pos < this.src.length && /[0-9]/.test(this.src[this.pos])) {
this.pos++;
}
return { type: TokenType.NUMBER, numValue: parseInt(this.src.slice(start, this.pos)) };
}
this.pos++;
const charMap = {
'(': TokenType.LPAREN,
')': TokenType.RPAREN,
'{': TokenType.LBRACE,
'}': TokenType.RBRACE,
',': TokenType.COMMA,
';': TokenType.SEMICOLON,
'+': TokenType.PLUS,
'.': TokenType.DOT
};
return { type: charMap[c] || TokenType.EOF };
}
advance() {
this.current = this.nextToken();
}
}
class VM {
constructor() {
this.code = [];
this.entryPoint = 0;
this.stack = [];
this.callStack = [];
this.functions = [];
this.externs = [
{
name: 'std.io.println',
fn: vm => {
console.log(vm.pop());
vm.push(0);
},
params: 1
},
{
name: 'bunny.squeak',
fn: vm => {
console.log('squeak');
vm.push(0);
},
params: 0
}
];
this.output = [];
}
emit(op, operand = 0) {
this.code.push({ op, operand });
}
push(v) {
this.stack.push(v);
}
pop() {
return this.stack.pop();
}
findParam(func, name) {
if (!func) return -1;
let idx = func.paramNames.indexOf(name);
return idx !== -1 ? func.params - 1 - idx : -1;
}
findFunction(name) {
return this.functions.findIndex(f => f.name === name);
}
findExtern(name) {
return this.externs.findIndex(e => e.name === name);
}
run() {
let pc = this.entryPoint;
let fp = 0;
this.output = [];
const origLog = console.log;
console.log = (...args) => this.output.push(args.join(' '));
while (pc < this.code.length) {
const instr = this.code[pc];
switch (instr.op) {
case Opcode.CONST:
this.push(instr.operand);
pc++;
break;
case Opcode.LOAD:
this.push(this.stack[fp + instr.operand]);
pc++;
break;
case Opcode.ADD: {
let b = this.pop(),
a = this.pop();
this.push(a + b);
pc++;
break;
}
case Opcode.CALL: {
let func = this.functions[instr.operand];
this.callStack.push(pc + 1, fp);
fp = this.stack.length - func.params;
pc = func.addr;
break;
}
case Opcode.RETURN: {
let ret = this.pop();
this.stack.length = fp;
fp = this.callStack.pop();
pc = this.callStack.pop();
this.push(ret);
break;
}
case Opcode.EXTERN:
this.externs[instr.operand].fn(this);
pc++;
break;
case Opcode.HALT:
console.log = origLog;
return;
}
}
console.log = origLog;
}
printBytecode() {
const names = ['CONST', 'LOAD', 'ADD', 'CALL', 'RETURN', 'EXTERN', 'HALT'];
let out = 'bytecode:\n';
this.code.forEach((instr, i) => {
out += `${String(i).padStart(3)}: ${names[instr.op].padEnd(8)} ${instr.operand}\n`;
});
return out;
}
}
function parseQualifiedName(lex) {
if (lex.current.type !== TokenType.IDENT) return null;
let name = lex.current.value;
lex.advance();
while (lex.current.type === TokenType.DOT) {
lex.advance();
if (lex.current.type !== TokenType.IDENT) break;
name += '.' + lex.current.value;
lex.advance();
}
return name;
}
function parseExpr(lex, vm, currentFunc) {
if (lex.current.type === TokenType.NUMBER) {
vm.emit(Opcode.CONST, lex.current.numValue);
lex.advance();
} else if (lex.current.type === TokenType.IDENT) {
let name = parseQualifiedName(lex);
if (lex.current.type === TokenType.LPAREN) {
parseCall(lex, vm, currentFunc, name);
} else {
let offset = vm.findParam(currentFunc, name);
vm.emit(Opcode.LOAD, offset);
}
}
if (lex.current.type === TokenType.PLUS) {
lex.advance();
parseExpr(lex, vm, currentFunc);
vm.emit(Opcode.ADD);
}
}
function parseCall(lex, vm, currentFunc, name) {
lex.advance(); // skip (
if (lex.current.type !== TokenType.RPAREN) {
parseExpr(lex, vm, currentFunc);
while (lex.current.type === TokenType.COMMA) {
lex.advance();
parseExpr(lex, vm, currentFunc);
}
}
lex.advance(); // skip )
let externId = vm.findExtern(name);
if (externId !== -1) {
vm.emit(Opcode.EXTERN, externId);
} else {
vm.emit(Opcode.CALL, vm.findFunction(name));
}
}
function parseFunction(lex, vm) {
lex.advance(); // skip 'fn'
let funcName = lex.current.value;
lex.advance();
lex.advance(); // skip (
let paramNames = [];
if (lex.current.type === TokenType.IDENT) {
paramNames.push(lex.current.value);
lex.advance();
while (lex.current.type === TokenType.COMMA) {
lex.advance();
paramNames.push(lex.current.value);
lex.advance();
}
}
lex.advance(); // skip )
lex.advance(); // skip {
let func = {
name: funcName,
addr: vm.code.length,
params: paramNames.length,
paramNames
};
vm.functions.push(func);
while (lex.current.type !== TokenType.RBRACE) {
if (lex.current.type === TokenType.RETURN) {
lex.advance();
parseExpr(lex, vm, func);
vm.emit(Opcode.RETURN);
lex.advance(); // skip ;
}
}
lex.advance(); // skip }
}
function parseProgram(lex, vm) {
while (lex.current.type === TokenType.FN) {
parseFunction(lex, vm);
}
vm.entryPoint = vm.code.length;
while (lex.current.type !== TokenType.EOF) {
if (lex.current.type === TokenType.IDENT) {
let name = parseQualifiedName(lex);
if (lex.current.type === TokenType.LPAREN) {
parseCall(lex, vm, null, name);
if (lex.current.type === TokenType.SEMICOLON) lex.advance();
}
} else {
lex.advance();
}
}
vm.emit(Opcode.HALT);
}
function compile(source) {
const vm = new VM();
const lex = new Lexer(source);
parseProgram(lex, vm);
return vm;
}
const source = `fn add(a, b) {
return a + b;
}
bunny.squeak();
std.io.println(add(5, 10));`;
console.log('Source:');
console.log(source);
console.log('\n' + '='.repeat(40) + '\n');
const vm = compile(source);
console.log(vm.printBytecode());
console.log('Output:');
vm.run();
vm.output.forEach(line => console.log(line));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment