#include #include #include #include #include typedef enum { FUNCTION, SUB, END, AS, TYPE, IDENTIFIER, INTEGER, STRING, OPAREN, CPAREN, COMMA, QUOTE, EQUALS, TOKEN_TYPE_COUNT } PBTokenType; typedef struct { PBTokenType type; std::string value; } PBToken; std::vector tokenize(std::string code); int main(int argc, char* argv[]) { //for (int n = 0; n < argc; n++) { // std::cout << "arg" << n << ": " << argv[n] << std::endl; //} return 0; } std::vector tokenize(std::string code) { const PBToken tokenize_one = [](std::string fragment) { const std::regex re_func("\bfunction\b", std::regex_constants::icase); const std::regex re_sub( "\bsub\b", std::regex_constants::icase); const std::regex re_end( "\bend\b", std::regex_constants::icase); const std::regex re_as("\bas\b", std::regex_constants::icase); const std::regex re_type("\blong\b", std::regex_constants::icase); const std::regex re_identifier("\b[a-zA-Z]+\b"); const std::regex re_integer("\b[0-9]+\b"); const std::regex re_string("\".*\""); const std::regex re_oparen("\("); const std::regex re_cparen("\)"); const std::regex re_comma(","); const std::regex re_quote("'"); const std::regex re_equals("="); PBTokenType tt = SUB; std::string val = fragment.trim(); return { .type = tt, .value = val }; }; std::vector tokens(); while(code.length() > 0) { int split = code.find(' '); std::string fragment = split > 0 ? code.substr(0, split) : code; tokens.push_back(tokenize_one(fragment)); } return tokens; }