diff --git a/neopb.cpp b/neopb.cpp index 617e8a9..38fb0d7 100644 --- a/neopb.cpp +++ b/neopb.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,43 +9,60 @@ std::vector tokenize(std::string code); +std::string readFileContents(std::string fname) { + std::ifstream ifs(fname); + std::string contents( (std::istreambuf_iterator(ifs)), (std::istreambuf_iterator()) ); + return contents; +} + int main(int argc, char* argv[]) { - //for (int n = 0; n < argc; n++) { - // std::cout << "arg" << n << ": " << argv[n] << std::endl; - //} + for (int n = 0; n < argc; n++) { + std::cout << "arg" << n << ": " << argv[n] << std::endl; + } + std::string infile = argc > 1 ? argv[1] : ""; + std::string code = ""; + if(infile.length() > 0) { + code = readFileContents(infile); + } + Tokenizer tokenizer = Tokenizer(code); + std::cout << "code: " << tokenizer.dump() << std::endl; + std::vector tokens = tokenizer.tokenize(); + for(int i = 0; i < tokens.size(); i++) { + std::cout << tokens[i].value << std::endl; + } return 0; } -std::vector tokenize(std::string code) { - const PBToken tokenize_one = [](std::string fragment) { - //const std::unordered_map tokentypes = std::unordered_map(); - const std::regex re_func("\bfunction\b", std::regex_constants::icase); - const std::regex re_sub( "\bsub\b", std::regex_constants::icase); - const std::regex re_end( "\bend\b", std::regex_constants::icase); - const std::regex re_as("\bas\b", std::regex_constants::icase); - const std::regex re_type("\blong\b", std::regex_constants::icase); - const std::regex re_identifier("\b[a-zA-Z]+\b"); - const std::regex re_integer("\b[0-9]+\b"); - const std::regex re_string("\".*\""); - const std::regex re_oparen("\("); - const std::regex re_cparen("\)"); - const std::regex re_comma(","); - const std::regex re_quote("'"); - const std::regex re_equals("="); - - PBTokenType tt = SUB; - std::string val = fragment.trim(); - - - - return { .type = tt, .value = val }; - }; - std::vector tokens(); - while(code.length() > 0) { - int split = code.find(' '); - std::string fragment = split > 0 ? code.substr(0, split) : code; - tokens.push_back(fragment); - } - return tokens; -} +//std::vector tokenize(std::string code) { +// const PBToken tokenize_one = [](std::string fragment) { +// //const std::unordered_map tokentypes = std::unordered_map(); +// const std::regex re_func("\bfunction\b", std::regex_constants::icase); +// const std::regex re_sub( "\bsub\b", std::regex_constants::icase); +// const std::regex re_end( "\bend\b", std::regex_constants::icase); +// const std::regex re_as("\bas\b", std::regex_constants::icase); +// const std::regex re_type("\blong\b", std::regex_constants::icase); +// const std::regex re_identifier("\b[a-zA-Z]+\b"); +// const std::regex re_integer("\b[0-9]+\b"); +// const std::regex re_string("\".*\""); +// const std::regex re_oparen("\("); +// const std::regex re_cparen("\)"); +// const std::regex re_comma(","); +// const std::regex re_quote("'"); +// const std::regex re_equals("="); +// +// PBTokenType tt = SUB; +// std::string val = fragment.trim(); +// +// +// +// return { .type = tt, .value = val }; +// }; +// std::vector tokens(); +// while(code.length() > 0) { +// int split = code.find(' '); +// std::string fragment = split > 0 ? code.substr(0, split) : code; +// tokens.push_back(fragment); +// } +// return tokens; +//} diff --git a/pb2c.rb b/pb2c.rb index c9d43b1..feb0b24 100755 --- a/pb2c.rb +++ b/pb2c.rb @@ -2,6 +2,7 @@ class Tokenizer TOKEN_TYPES = [ + [:preproc, /#[a-zA-Z]+\b/i], [:function, /\bfunction\b/i], [:sub, /\bsub\b/i], [:end, /\bend\b/i], @@ -25,6 +26,7 @@ class Tokenizer until @code.empty? tokens << tokenize_one_token @code = @code.strip + #puts tokens.join("\n") end rescue RuntimeError => e puts tokens.join("\n") @@ -58,6 +60,9 @@ class Parser parse_function end + def parse_preproc + end + def parse_function consume(:function) name = consume(:identifier).value @@ -146,7 +151,8 @@ class Generator end end -tokens = Tokenizer.new(File.read("hello.bas")).tokenize +#tokens = Tokenizer.new(File.read("hello.bas")).tokenize +tokens = Tokenizer.new(File.read(ARGV[0])).tokenize #puts "Tokens:\n" #puts tokens.join("\n") tree = Parser.new(tokens).parse diff --git a/tokenizer.cpp b/tokenizer.cpp index e69de29..8a3938d 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -0,0 +1,19 @@ +#include "tokenizer.hpp" + +Tokenizer::Tokenizer(std::string code) { + this->code = code; +} + +std::vector Tokenizer::tokenize() { + std::vector tokens; + tokens.push_back(tokenize_one_token()); + return tokens; +} + +PBToken Tokenizer::tokenize_one_token() { + return { .type = FUNCTION, .value = "Function" }; +} + +std::string Tokenizer::dump() { + return this->code; +} diff --git a/tokenizer.hpp b/tokenizer.hpp index 55bddf2..a144155 100644 --- a/tokenizer.hpp +++ b/tokenizer.hpp @@ -34,5 +34,6 @@ public: Tokenizer(std::string); std::vector tokenize(); PBToken tokenize_one_token(); + std::string dump(); }; #endif