Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d0714f8664 | ||
| 68ea89ece6 | |||
| 23e7faf186 | |||
| 13ce1a72c1 | |||
| acbdf1de5f |
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
a.out
|
||||
|
||||
3
hello.bas
Normal file
3
hello.bas
Normal file
@ -0,0 +1,3 @@
|
||||
Function PBMain() as Long
|
||||
PRINT "Hello, world!"
|
||||
End Function
|
||||
50
neopb.cpp
Normal file
50
neopb.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "tokenizer.hpp"
|
||||
|
||||
std::vector<PBToken> tokenize(std::string code);
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
//for (int n = 0; n < argc; n++) {
|
||||
// std::cout << "arg" << n << ": " << argv[n] << std::endl;
|
||||
//}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::vector<PBToken> tokenize(std::string code) {
|
||||
const PBToken tokenize_one = [](std::string fragment) {
|
||||
//const std::unordered_map<PBTokenType, std::regex> tokentypes = std::unordered_map();
|
||||
const std::regex re_func("\bfunction\b", std::regex_constants::icase);
|
||||
const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
|
||||
const std::regex re_end( "\bend\b", std::regex_constants::icase);
|
||||
const std::regex re_as("\bas\b", std::regex_constants::icase);
|
||||
const std::regex re_type("\blong\b", std::regex_constants::icase);
|
||||
const std::regex re_identifier("\b[a-zA-Z]+\b");
|
||||
const std::regex re_integer("\b[0-9]+\b");
|
||||
const std::regex re_string("\".*\"");
|
||||
const std::regex re_oparen("\(");
|
||||
const std::regex re_cparen("\)");
|
||||
const std::regex re_comma(",");
|
||||
const std::regex re_quote("'");
|
||||
const std::regex re_equals("=");
|
||||
|
||||
PBTokenType tt = SUB;
|
||||
std::string val = fragment.trim();
|
||||
|
||||
|
||||
|
||||
return { .type = tt, .value = val };
|
||||
};
|
||||
std::vector<PBToken> tokens();
|
||||
while(code.length() > 0) {
|
||||
int split = code.find(' ');
|
||||
std::string fragment = split > 0 ? code.substr(0, split) : code;
|
||||
tokens.push_back(fragment);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
162
pb2c.rb
Executable file
162
pb2c.rb
Executable file
@ -0,0 +1,162 @@
|
||||
#!/usr/bin/ruby
|
||||
|
||||
class Tokenizer
|
||||
TOKEN_TYPES = [
|
||||
[:function, /\bfunction\b/i],
|
||||
[:sub, /\bsub\b/i],
|
||||
[:end, /\bend\b/i],
|
||||
[:as, /\bas\b/i],
|
||||
[:typename, /\blong\b/i],
|
||||
[:identifier, /\b[a-zA-Z]+\b/],
|
||||
[:integer, /\b[0-9]+\b/],
|
||||
[:string, /".*"/],
|
||||
[:oparen, /\(/],
|
||||
[:cparen, /\)/],
|
||||
[:comma, /,/],
|
||||
[:quote, /'/],
|
||||
]
|
||||
def initialize(code)
|
||||
@code = code
|
||||
end
|
||||
|
||||
def tokenize
|
||||
tokens = []
|
||||
begin
|
||||
until @code.empty?
|
||||
tokens << tokenize_one_token
|
||||
@code = @code.strip
|
||||
end
|
||||
rescue RuntimeError => e
|
||||
puts tokens.join("\n")
|
||||
raise
|
||||
end
|
||||
tokens
|
||||
end
|
||||
|
||||
def tokenize_one_token
|
||||
TOKEN_TYPES.each do |type, re|
|
||||
re = /\A(#{re})/
|
||||
if @code =~ re
|
||||
value = $1
|
||||
@code = @code[value.length..-1]
|
||||
return Token.new(type, value)
|
||||
end
|
||||
end
|
||||
raise RuntimeError.new(
|
||||
"Couldn't match token on #{@code.inspect}")
|
||||
end
|
||||
end
|
||||
|
||||
Token = Struct.new(:type, :value)
|
||||
|
||||
class Parser
|
||||
def initialize(tokens)
|
||||
@tokens = tokens
|
||||
end
|
||||
|
||||
def parse
|
||||
parse_function
|
||||
end
|
||||
|
||||
def parse_function
|
||||
consume(:function)
|
||||
name = consume(:identifier).value
|
||||
arg_names = parse_arg_names
|
||||
consume(:as)
|
||||
rtype = consume(:typename).value
|
||||
body = parse_expr
|
||||
consume(:end)
|
||||
consume(:function)
|
||||
FunctionNode.new(name, rtype, arg_names, body)
|
||||
end
|
||||
|
||||
def parse_arg_names
|
||||
arg_names = []
|
||||
consume(:oparen)
|
||||
if peek(:identifier)
|
||||
arg_names << consume(:identifier).value
|
||||
while peek(:comma)
|
||||
consume(:comma)
|
||||
arg_names << consume(:identifier).value
|
||||
end
|
||||
end
|
||||
consume(:cparen)
|
||||
arg_names
|
||||
end
|
||||
|
||||
def parse_expr
|
||||
if peek(:integer)
|
||||
parse_integer
|
||||
elsif peek(:string)
|
||||
parse_string
|
||||
elsif peek(:identifier) && peek(:oparen, 1)
|
||||
parse_call
|
||||
elsif peek(:identifier) && peek(:string, 1)
|
||||
parse_stmt
|
||||
else
|
||||
parse_var_ref
|
||||
end
|
||||
end
|
||||
|
||||
def parse_stmt
|
||||
name = consume(:identifier).value
|
||||
arg_exprs = consume(:string).value
|
||||
CallNode.new(name, arg_exprs)
|
||||
end
|
||||
|
||||
def peek(expected_type, offset=0)
|
||||
@tokens.fetch(offset).type == expected_type
|
||||
end
|
||||
|
||||
def consume(expected_type)
|
||||
token = @tokens.shift
|
||||
if token.type == expected_type
|
||||
token
|
||||
else
|
||||
raise RuntimeError.new(
|
||||
"Expected token type #{expected_type.inspect} but got #{token.type.inspect}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
FunctionNode = Struct.new(:name, :type, :arg_names, :body)
|
||||
StringNode = Struct.new(:value)
|
||||
CallNode = Struct.new(:name, :arg_exprs)
|
||||
|
||||
class Generator
|
||||
def generate(node)
|
||||
case node
|
||||
when FunctionNode
|
||||
"%s %s(%s) { return %s ; }" % [
|
||||
node.type.downcase,
|
||||
node.name,
|
||||
node.arg_names.join(','),
|
||||
generate(node.body),
|
||||
]
|
||||
when CallNode
|
||||
"%s(%s)" % [
|
||||
node.name,
|
||||
node.arg_exprs
|
||||
]
|
||||
when StringNode
|
||||
node.value
|
||||
else
|
||||
raise RuntimeError.new("Unexpected node type: #{node.class}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
tokens = Tokenizer.new(File.read("hello.bas")).tokenize
|
||||
#puts "Tokens:\n"
|
||||
#puts tokens.join("\n")
|
||||
tree = Parser.new(tokens).parse
|
||||
#puts "\nAST:\n"
|
||||
#puts tree
|
||||
RUNTIME = "#include <stdio.h>\n#define PRINT(a) printf(a)\n"
|
||||
CMAIN = "int main(void) { PBMain(); return 0; }"
|
||||
generated = Generator.new.generate(tree)
|
||||
#puts "\nGenerated:\n"
|
||||
#puts generated
|
||||
#puts "\nGenerated with preamble/postamble:\n"
|
||||
puts [RUNTIME, generated, CMAIN].join("\n")
|
||||
|
||||
0
tokenizer.cpp
Normal file
0
tokenizer.cpp
Normal file
38
tokenizer.hpp
Normal file
38
tokenizer.hpp
Normal file
@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
#ifndef TOKENIZER_HPP
|
||||
#define TOKENIZER_HPP
|
||||
|
||||
#include <regex>
|
||||
#include <unordered_map>
|
||||
|
||||
typedef enum {
|
||||
FUNCTION,
|
||||
SUB,
|
||||
END,
|
||||
AS,
|
||||
TYPE,
|
||||
IDENTIFIER,
|
||||
INTEGER,
|
||||
STRING,
|
||||
OPAREN,
|
||||
CPAREN,
|
||||
COMMA,
|
||||
QUOTE,
|
||||
EQUALS,
|
||||
TOKEN_TYPE_COUNT
|
||||
} PBTokenType;
|
||||
|
||||
typedef struct {
|
||||
PBTokenType type;
|
||||
std::string value;
|
||||
} PBToken;
|
||||
|
||||
class Tokenizer {
|
||||
std::unordered_map<PBTokenType, std::regex> tokentypes;
|
||||
std::string code;
|
||||
public:
|
||||
Tokenizer(std::string);
|
||||
std::vector<PBToken> tokenize();
|
||||
PBToken tokenize_one_token();
|
||||
};
|
||||
#endif
|
||||
Loading…
x
Reference in New Issue
Block a user