Compare commits

..

9 Commits

Author SHA1 Message Date
hiro
3f6ef19cca feat: add build script 2025-12-24 12:40:07 -05:00
hiro
f2e188cf1f wip: add parser stub, cleanup scratch comments in tokenizer 2025-12-22 21:41:23 -05:00
hiro
1e7ea750f6 wip: implement tokenizer in c++, ignore binaries 2025-12-22 18:39:09 -05:00
hiro
fb1059683f wip: work on cpp tokenizer implementation 2025-12-21 18:20:47 -05:00
hiro
d0714f8664 split out tokenizer class 2025-11-18 16:24:59 -06:00
68ea89ece6 base c++ work 2025-11-18 09:12:59 -06:00
23e7faf186 remove c stub 2025-11-17 16:22:20 -06:00
13ce1a72c1 chore: gitignore a.out 2025-03-26 00:05:18 -05:00
acbdf1de5f first pass transpiler implementation - pb2c.rb
- Modified version of Gary Bernhardt's example compiler from scratch
- Transpiles absolute barebones PowerBasic to C

Compile with `./pb2c.rb | gcc -xc -`
2025-03-26 00:04:16 -05:00
8 changed files with 356 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
a.out
npb
hello

13
build.sh Executable file
View File

@ -0,0 +1,13 @@
#!/usr/bin/env -S bash
DEFINES=""
MODE="release"
while getopts "d" flag; do
case "$flag" in
d) DEFINES="$DEFINES -DDEBUG"; MODE="debug" ;;
esac
done
echo building neopb as "$MODE"...
g++ -o npb $DEFINES neopb.cpp tokenizer.cpp

3
hello.bas Normal file
View File

@ -0,0 +1,3 @@
Function PBMain() as Long
PRINT "Hello, world!"
End Function

41
neopb.cpp Normal file
View File

@ -0,0 +1,41 @@
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "tokenizer.hpp"
std::vector<PBToken> tokenize(std::string code);
std::string readFileContents(std::string fname) {
std::ifstream ifs(fname);
std::string contents( (std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()) );
return contents;
}
int main(int argc, char* argv[]) {
#ifdef DEBUG
for (int n = 0; n < argc; n++) {
std::cout << "arg" << n << ": " << argv[n] << std::endl;
}
#endif
std::string infile = argc > 1 ? argv[1] : "";
std::string code = "";
if(infile.length() > 0) {
code = readFileContents(infile);
}
Tokenizer tokenizer = Tokenizer(code);
#ifdef DEBUG
std::cout << "code: " << tokenizer.dump() << std::endl;
#endif
std::vector<PBToken> tokens = tokenizer.tokenize();
for(int i = 0; i < tokens.size(); i++) {
std::cout << tokens[i].value << std::endl;
}
return 0;
}

22
parser.hpp Normal file
View File

@ -0,0 +1,22 @@
#pragma once
#ifndef PARSER_HPP
#define PARSER_HPP
class Parser {
std::vector<PBToken> tokens;
public:
Parser(std::vector);
std::vector parse();
};
#endif

168
pb2c.rb Executable file
View File

@ -0,0 +1,168 @@
#!/usr/bin/ruby
class Tokenizer
TOKEN_TYPES = [
[:preproc, /#[a-zA-Z]+\b/i],
[:function, /\bfunction\b/i],
[:sub, /\bsub\b/i],
[:end, /\bend\b/i],
[:as, /\bas\b/i],
[:typename, /\blong\b/i],
[:identifier, /\b[a-zA-Z]+\b/],
[:integer, /\b[0-9]+\b/],
[:string, /".*"/],
[:oparen, /\(/],
[:cparen, /\)/],
[:comma, /,/],
[:quote, /'/],
]
def initialize(code)
@code = code
end
def tokenize
tokens = []
begin
until @code.empty?
tokens << tokenize_one_token
@code = @code.strip
#puts tokens.join("\n")
end
rescue RuntimeError => e
puts tokens.join("\n")
raise
end
tokens
end
def tokenize_one_token
TOKEN_TYPES.each do |type, re|
re = /\A(#{re})/
if @code =~ re
value = $1
@code = @code[value.length..-1]
return Token.new(type, value)
end
end
raise RuntimeError.new(
"Couldn't match token on #{@code.inspect}")
end
end
Token = Struct.new(:type, :value)
class Parser
def initialize(tokens)
@tokens = tokens
end
def parse
parse_function
end
def parse_preproc
end
def parse_function
consume(:function)
name = consume(:identifier).value
arg_names = parse_arg_names
consume(:as)
rtype = consume(:typename).value
body = parse_expr
consume(:end)
consume(:function)
FunctionNode.new(name, rtype, arg_names, body)
end
def parse_arg_names
arg_names = []
consume(:oparen)
if peek(:identifier)
arg_names << consume(:identifier).value
while peek(:comma)
consume(:comma)
arg_names << consume(:identifier).value
end
end
consume(:cparen)
arg_names
end
def parse_expr
if peek(:integer)
parse_integer
elsif peek(:string)
parse_string
elsif peek(:identifier) && peek(:oparen, 1)
parse_call
elsif peek(:identifier) && peek(:string, 1)
parse_stmt
else
parse_var_ref
end
end
def parse_stmt
name = consume(:identifier).value
arg_exprs = consume(:string).value
CallNode.new(name, arg_exprs)
end
def peek(expected_type, offset=0)
@tokens.fetch(offset).type == expected_type
end
def consume(expected_type)
token = @tokens.shift
if token.type == expected_type
token
else
raise RuntimeError.new(
"Expected token type #{expected_type.inspect} but got #{token.type.inspect}")
end
end
end
FunctionNode = Struct.new(:name, :type, :arg_names, :body)
StringNode = Struct.new(:value)
CallNode = Struct.new(:name, :arg_exprs)
class Generator
def generate(node)
case node
when FunctionNode
"%s %s(%s) { return %s ; }" % [
node.type.downcase,
node.name,
node.arg_names.join(','),
generate(node.body),
]
when CallNode
"%s(%s)" % [
node.name,
node.arg_exprs
]
when StringNode
node.value
else
raise RuntimeError.new("Unexpected node type: #{node.class}")
end
end
end
#tokens = Tokenizer.new(File.read("hello.bas")).tokenize
tokens = Tokenizer.new(File.read(ARGV[0])).tokenize
#puts "Tokens:\n"
#puts tokens.join("\n")
tree = Parser.new(tokens).parse
#puts "\nAST:\n"
#puts tree
RUNTIME = "#include <stdio.h>\n#define PRINT(a) printf(a)\n"
CMAIN = "int main(void) { PBMain(); return 0; }"
generated = Generator.new.generate(tree)
#puts "\nGenerated:\n"
#puts generated
#puts "\nGenerated with preamble/postamble:\n"
puts [RUNTIME, generated, CMAIN].join("\n")

63
tokenizer.cpp Normal file
View File

@ -0,0 +1,63 @@
#include "tokenizer.hpp"
#include <iostream>
static inline std::string &ltrim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) {return !std::isspace(c);}));
return s;
}
Tokenizer::Tokenizer(std::string code) {
tokentypes = std::vector<std::pair<std::string, std::regex>>();
tokentypes.push_back( { "PREPROC", std::regex("^(#[a-zA-Z]+\\b)") } );
tokentypes.push_back( { "FUNCTION", std::regex("^(\\bfunction\\b)", std::regex_constants::icase) } );
tokentypes.push_back( { "SUB", std::regex("^(\\bsub\\b)", std::regex_constants::icase) } );
tokentypes.push_back( { "END", std::regex("^(\\bend\\b)", std::regex_constants::icase) } );
tokentypes.push_back( { "AS", std::regex("^(\\bas\\b)", std::regex_constants::icase) } );
tokentypes.push_back( { "TYPE", std::regex("^(\\blong\\b)", std::regex_constants::icase) } );
tokentypes.push_back( { "IDENTIFIER", std::regex("^(\\b[a-zA-Z]+\\b)") } );
tokentypes.push_back( { "INTEGER", std::regex("^(\\b[0-9]+\\b)") } );
tokentypes.push_back( { "STRING", std::regex("^(\".*\")") } );
tokentypes.push_back( { "OPAREN", std::regex("^(\\()") } );
tokentypes.push_back( { "CPAREN", std::regex("^(\\))") } );
tokentypes.push_back( { "COMMA", std::regex("^(,)") } );
tokentypes.push_back( { "QUOTE", std::regex("^(')") } );
tokentypes.push_back( { "EQUALS", std::regex("^(=)") } );
this->code = code;
}
std::vector<PBToken> Tokenizer::tokenize() {
std::vector<PBToken> tokens;
code = ltrim(code);
while(code.size() > 0) {
tokens.push_back(tokenize_one_token());
code = ltrim(code);
}
return tokens;
}
PBToken Tokenizer::tokenize_one_token() {
PBToken tk;
for(int tt = 0; tt < TOKEN_TYPE_COUNT; tt++) {
#ifdef DEBUG
std::cout << "attempting to match " << std::get<0>(tokentypes[tt]) << std::endl;
#endif
std::regex re = std::get<1>(tokentypes[tt]);
std::smatch sm;
if(std::regex_search(code, sm, re)) {
tk.type = (PBTokenType) tt;
tk.value = sm[0];
#ifdef DEBUG
std::cout << "match found" << std::endl;
std::cout << "tk.type: " << std::get<0>(tokentypes[tk.type]) << std::endl << "tk.value: " << tk.value << std::endl;
#endif
code = code.substr(tk.value.length());
return tk;
}
}
throw std::runtime_error("Couldn't match token: " + code.substr(0, code.find_first_of(" \t\r\n")));
}
std::string Tokenizer::dump() {
return this->code;
}

42
tokenizer.hpp Normal file
View File

@ -0,0 +1,42 @@
#pragma once
#ifndef TOKENIZER_HPP
#define TOKENIZER_HPP
#include <regex>
#include <unordered_map>
#include <utility>
#include <vector>
typedef enum {
PREPROC,
FUNCTION,
SUB,
END,
AS,
TYPE,
IDENTIFIER,
INTEGER,
STRING,
OPAREN,
CPAREN,
COMMA,
QUOTE,
EQUALS,
TOKEN_TYPE_COUNT
} PBTokenType;
typedef struct {
PBTokenType type;
std::string value;
} PBToken;
class Tokenizer {
std::vector<std::pair<std::string, std::regex>> tokentypes;
std::string code;
public:
Tokenizer(std::string);
std::vector<PBToken> tokenize();
PBToken tokenize_one_token();
std::string dump();
};
#endif