neopb/neopb.cpp
2025-11-18 09:12:59 -06:00

70 lines
1.8 KiB
C++

#include <iostream>
#include <regex>
#include <string>
#include <unordered_map>
#include <vector>
typedef enum {
FUNCTION,
SUB,
END,
AS,
TYPE,
IDENTIFIER,
INTEGER,
STRING,
OPAREN,
CPAREN,
COMMA,
QUOTE,
EQUALS,
TOKEN_TYPE_COUNT
} PBTokenType;
typedef struct {
PBTokenType type;
std::string value;
} PBToken;
std::vector<PBToken> tokenize(std::string code);
int main(int argc, char* argv[]) {
//for (int n = 0; n < argc; n++) {
// std::cout << "arg" << n << ": " << argv[n] << std::endl;
//}
return 0;
}
std::vector<PBToken> tokenize(std::string code) {
const PBToken tokenize_one = [](std::string fragment) {
const std::regex re_func("\bfunction\b", std::regex_constants::icase);
const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
const std::regex re_end( "\bend\b", std::regex_constants::icase);
const std::regex re_as("\bas\b", std::regex_constants::icase);
const std::regex re_type("\blong\b", std::regex_constants::icase);
const std::regex re_identifier("\b[a-zA-Z]+\b");
const std::regex re_integer("\b[0-9]+\b");
const std::regex re_string("\".*\"");
const std::regex re_oparen("\(");
const std::regex re_cparen("\)");
const std::regex re_comma(",");
const std::regex re_quote("'");
const std::regex re_equals("=");
PBTokenType tt = SUB;
std::string val = fragment.trim();
return { .type = tt, .value = val };
};
std::vector<PBToken> tokens();
while(code.length() > 0) {
int split = code.find(' ');
std::string fragment = split > 0 ? code.substr(0, split) : code;
tokens.push_back(tokenize_one(fragment));
}
return tokens;
}