wip: work on cpp tokenizer implementation
This commit is contained in:
parent
d0714f8664
commit
fb1059683f
88
neopb.cpp
88
neopb.cpp
@ -1,3 +1,4 @@
|
|||||||
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -8,43 +9,60 @@
|
|||||||
|
|
||||||
std::vector<PBToken> tokenize(std::string code);
|
std::vector<PBToken> tokenize(std::string code);
|
||||||
|
|
||||||
|
std::string readFileContents(std::string fname) {
|
||||||
|
std::ifstream ifs(fname);
|
||||||
|
std::string contents( (std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()) );
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
//for (int n = 0; n < argc; n++) {
|
for (int n = 0; n < argc; n++) {
|
||||||
// std::cout << "arg" << n << ": " << argv[n] << std::endl;
|
std::cout << "arg" << n << ": " << argv[n] << std::endl;
|
||||||
//}
|
}
|
||||||
|
std::string infile = argc > 1 ? argv[1] : "";
|
||||||
|
std::string code = "";
|
||||||
|
if(infile.length() > 0) {
|
||||||
|
code = readFileContents(infile);
|
||||||
|
}
|
||||||
|
Tokenizer tokenizer = Tokenizer(code);
|
||||||
|
std::cout << "code: " << tokenizer.dump() << std::endl;
|
||||||
|
std::vector<PBToken> tokens = tokenizer.tokenize();
|
||||||
|
for(int i = 0; i < tokens.size(); i++) {
|
||||||
|
std::cout << tokens[i].value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<PBToken> tokenize(std::string code) {
|
//std::vector<PBToken> tokenize(std::string code) {
|
||||||
const PBToken tokenize_one = [](std::string fragment) {
|
// const PBToken tokenize_one = [](std::string fragment) {
|
||||||
//const std::unordered_map<PBTokenType, std::regex> tokentypes = std::unordered_map();
|
// //const std::unordered_map<PBTokenType, std::regex> tokentypes = std::unordered_map();
|
||||||
const std::regex re_func("\bfunction\b", std::regex_constants::icase);
|
// const std::regex re_func("\bfunction\b", std::regex_constants::icase);
|
||||||
const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
|
// const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
|
||||||
const std::regex re_end( "\bend\b", std::regex_constants::icase);
|
// const std::regex re_end( "\bend\b", std::regex_constants::icase);
|
||||||
const std::regex re_as("\bas\b", std::regex_constants::icase);
|
// const std::regex re_as("\bas\b", std::regex_constants::icase);
|
||||||
const std::regex re_type("\blong\b", std::regex_constants::icase);
|
// const std::regex re_type("\blong\b", std::regex_constants::icase);
|
||||||
const std::regex re_identifier("\b[a-zA-Z]+\b");
|
// const std::regex re_identifier("\b[a-zA-Z]+\b");
|
||||||
const std::regex re_integer("\b[0-9]+\b");
|
// const std::regex re_integer("\b[0-9]+\b");
|
||||||
const std::regex re_string("\".*\"");
|
// const std::regex re_string("\".*\"");
|
||||||
const std::regex re_oparen("\(");
|
// const std::regex re_oparen("\(");
|
||||||
const std::regex re_cparen("\)");
|
// const std::regex re_cparen("\)");
|
||||||
const std::regex re_comma(",");
|
// const std::regex re_comma(",");
|
||||||
const std::regex re_quote("'");
|
// const std::regex re_quote("'");
|
||||||
const std::regex re_equals("=");
|
// const std::regex re_equals("=");
|
||||||
|
//
|
||||||
PBTokenType tt = SUB;
|
// PBTokenType tt = SUB;
|
||||||
std::string val = fragment.trim();
|
// std::string val = fragment.trim();
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
return { .type = tt, .value = val };
|
// return { .type = tt, .value = val };
|
||||||
};
|
// };
|
||||||
std::vector<PBToken> tokens();
|
// std::vector<PBToken> tokens();
|
||||||
while(code.length() > 0) {
|
// while(code.length() > 0) {
|
||||||
int split = code.find(' ');
|
// int split = code.find(' ');
|
||||||
std::string fragment = split > 0 ? code.substr(0, split) : code;
|
// std::string fragment = split > 0 ? code.substr(0, split) : code;
|
||||||
tokens.push_back(fragment);
|
// tokens.push_back(fragment);
|
||||||
}
|
// }
|
||||||
return tokens;
|
// return tokens;
|
||||||
}
|
//}
|
||||||
|
|||||||
8
pb2c.rb
8
pb2c.rb
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
class Tokenizer
|
class Tokenizer
|
||||||
TOKEN_TYPES = [
|
TOKEN_TYPES = [
|
||||||
|
[:preproc, /#[a-zA-Z]+\b/i],
|
||||||
[:function, /\bfunction\b/i],
|
[:function, /\bfunction\b/i],
|
||||||
[:sub, /\bsub\b/i],
|
[:sub, /\bsub\b/i],
|
||||||
[:end, /\bend\b/i],
|
[:end, /\bend\b/i],
|
||||||
@ -25,6 +26,7 @@ class Tokenizer
|
|||||||
until @code.empty?
|
until @code.empty?
|
||||||
tokens << tokenize_one_token
|
tokens << tokenize_one_token
|
||||||
@code = @code.strip
|
@code = @code.strip
|
||||||
|
#puts tokens.join("\n")
|
||||||
end
|
end
|
||||||
rescue RuntimeError => e
|
rescue RuntimeError => e
|
||||||
puts tokens.join("\n")
|
puts tokens.join("\n")
|
||||||
@ -58,6 +60,9 @@ class Parser
|
|||||||
parse_function
|
parse_function
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def parse_preproc
|
||||||
|
end
|
||||||
|
|
||||||
def parse_function
|
def parse_function
|
||||||
consume(:function)
|
consume(:function)
|
||||||
name = consume(:identifier).value
|
name = consume(:identifier).value
|
||||||
@ -146,7 +151,8 @@ class Generator
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
tokens = Tokenizer.new(File.read("hello.bas")).tokenize
|
#tokens = Tokenizer.new(File.read("hello.bas")).tokenize
|
||||||
|
tokens = Tokenizer.new(File.read(ARGV[0])).tokenize
|
||||||
#puts "Tokens:\n"
|
#puts "Tokens:\n"
|
||||||
#puts tokens.join("\n")
|
#puts tokens.join("\n")
|
||||||
tree = Parser.new(tokens).parse
|
tree = Parser.new(tokens).parse
|
||||||
|
|||||||
@ -0,0 +1,19 @@
|
|||||||
|
#include "tokenizer.hpp"
|
||||||
|
|
||||||
|
Tokenizer::Tokenizer(std::string code) {
|
||||||
|
this->code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<PBToken> Tokenizer::tokenize() {
|
||||||
|
std::vector<PBToken> tokens;
|
||||||
|
tokens.push_back(tokenize_one_token());
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
PBToken Tokenizer::tokenize_one_token() {
|
||||||
|
return { .type = FUNCTION, .value = "Function" };
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Tokenizer::dump() {
|
||||||
|
return this->code;
|
||||||
|
}
|
||||||
@ -34,5 +34,6 @@ public:
|
|||||||
Tokenizer(std::string);
|
Tokenizer(std::string);
|
||||||
std::vector<PBToken> tokenize();
|
std::vector<PBToken> tokenize();
|
||||||
PBToken tokenize_one_token();
|
PBToken tokenize_one_token();
|
||||||
|
std::string dump();
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user