70 lines
1.8 KiB
C++
70 lines
1.8 KiB
C++
#include <iostream>
|
|
#include <regex>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
typedef enum {
|
|
FUNCTION,
|
|
SUB,
|
|
END,
|
|
AS,
|
|
TYPE,
|
|
IDENTIFIER,
|
|
INTEGER,
|
|
STRING,
|
|
OPAREN,
|
|
CPAREN,
|
|
COMMA,
|
|
QUOTE,
|
|
EQUALS,
|
|
TOKEN_TYPE_COUNT
|
|
} PBTokenType;
|
|
|
|
typedef struct {
|
|
PBTokenType type;
|
|
std::string value;
|
|
} PBToken;
|
|
|
|
std::vector<PBToken> tokenize(std::string code);
|
|
|
|
int main(int argc, char* argv[]) {
|
|
//for (int n = 0; n < argc; n++) {
|
|
// std::cout << "arg" << n << ": " << argv[n] << std::endl;
|
|
//}
|
|
|
|
return 0;
|
|
}
|
|
|
|
std::vector<PBToken> tokenize(std::string code) {
|
|
const PBToken tokenize_one = [](std::string fragment) {
|
|
const std::regex re_func("\bfunction\b", std::regex_constants::icase);
|
|
const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
|
|
const std::regex re_end( "\bend\b", std::regex_constants::icase);
|
|
const std::regex re_as("\bas\b", std::regex_constants::icase);
|
|
const std::regex re_type("\blong\b", std::regex_constants::icase);
|
|
const std::regex re_identifier("\b[a-zA-Z]+\b");
|
|
const std::regex re_integer("\b[0-9]+\b");
|
|
const std::regex re_string("\".*\"");
|
|
const std::regex re_oparen("\(");
|
|
const std::regex re_cparen("\)");
|
|
const std::regex re_comma(",");
|
|
const std::regex re_quote("'");
|
|
const std::regex re_equals("=");
|
|
|
|
PBTokenType tt = SUB;
|
|
std::string val = fragment.trim();
|
|
|
|
|
|
|
|
return { .type = tt, .value = val };
|
|
};
|
|
std::vector<PBToken> tokens();
|
|
while(code.length() > 0) {
|
|
int split = code.find(' ');
|
|
std::string fragment = split > 0 ? code.substr(0, split) : code;
|
|
tokens.push_back(tokenize_one(fragment));
|
|
}
|
|
return tokens;
|
|
}
|