split out tokenizer class

2025-11-18 16:24:59 -06:00 · 2025-11-18 16:24:59 -06:00 · d0714f8664
commit d0714f8664
parent 68ea89ece6
3 changed files with 41 additions and 22 deletions
--- a/neopb.cpp
+++ b/neopb.cpp
@ -4,27 +4,7 @@
 #include <unordered_map>
 #include <vector>
-typedef enum {
+#include "tokenizer.hpp"
    FUNCTION,
    SUB,
    END,
    AS,
    TYPE,
    IDENTIFIER,
    INTEGER,
    STRING,
    OPAREN,
    CPAREN,
    COMMA,
    QUOTE,
    EQUALS,
    TOKEN_TYPE_COUNT
 } PBTokenType;
 typedef struct {
    PBTokenType type;
    std::string value;
 } PBToken;
 std::vector<PBToken> tokenize(std::string code);
@ -38,6 +18,7 @@ int main(int argc, char* argv[]) {
 std::vector<PBToken> tokenize(std::string code) {
    const PBToken tokenize_one = [](std::string fragment) {
        //const std::unordered_map<PBTokenType, std::regex> tokentypes = std::unordered_map();
        const std::regex re_func("\bfunction\b", std::regex_constants::icase);
        const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
        const std::regex re_end( "\bend\b", std::regex_constants::icase);
@ -63,7 +44,7 @@ std::vector<PBToken> tokenize(std::string code) {
    while(code.length() > 0) {
        int split = code.find(' ');
        std::string fragment = split > 0 ? code.substr(0, split) : code;
-        tokens.push_back(tokenize_one(fragment));
+        tokens.push_back(fragment);
    }
    return tokens;
 }
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
--- a/tokenizer.hpp
+++ b/tokenizer.hpp
@ -0,0 +1,38 @@
 #pragma once
 #ifndef TOKENIZER_HPP
 #define TOKENIZER_HPP
 #include <regex>
 #include <unordered_map>
 typedef enum {
    FUNCTION,
    SUB,
    END,
    AS,
    TYPE,
    IDENTIFIER,
    INTEGER,
    STRING,
    OPAREN,
    CPAREN,
    COMMA,
    QUOTE,
    EQUALS,
    TOKEN_TYPE_COUNT
 } PBTokenType;
 typedef struct {
    PBTokenType type;
    std::string value;
 } PBToken;
 class Tokenizer {
    std::unordered_map<PBTokenType, std::regex> tokentypes;
    std::string code;
 public:
    Tokenizer(std::string);
    std::vector<PBToken> tokenize();
    PBToken tokenize_one_token();
 };
 #endif