diff --git a/neopb.cpp b/neopb.cpp index 152d3a1..617e8a9 100644 --- a/neopb.cpp +++ b/neopb.cpp @@ -4,27 +4,7 @@ #include #include -typedef enum { - FUNCTION, - SUB, - END, - AS, - TYPE, - IDENTIFIER, - INTEGER, - STRING, - OPAREN, - CPAREN, - COMMA, - QUOTE, - EQUALS, - TOKEN_TYPE_COUNT -} PBTokenType; - -typedef struct { - PBTokenType type; - std::string value; -} PBToken; +#include "tokenizer.hpp" std::vector tokenize(std::string code); @@ -38,6 +18,7 @@ int main(int argc, char* argv[]) { std::vector tokenize(std::string code) { const PBToken tokenize_one = [](std::string fragment) { + //const std::unordered_map tokentypes = std::unordered_map(); const std::regex re_func("\bfunction\b", std::regex_constants::icase); const std::regex re_sub( "\bsub\b", std::regex_constants::icase); const std::regex re_end( "\bend\b", std::regex_constants::icase); @@ -63,7 +44,7 @@ std::vector tokenize(std::string code) { while(code.length() > 0) { int split = code.find(' '); std::string fragment = split > 0 ? code.substr(0, split) : code; - tokens.push_back(tokenize_one(fragment)); + tokens.push_back(fragment); } return tokens; } diff --git a/tokenizer.cpp b/tokenizer.cpp new file mode 100644 index 0000000..e69de29 diff --git a/tokenizer.hpp b/tokenizer.hpp new file mode 100644 index 0000000..55bddf2 --- /dev/null +++ b/tokenizer.hpp @@ -0,0 +1,38 @@ +#pragma once +#ifndef TOKENIZER_HPP +#define TOKENIZER_HPP + +#include +#include + +typedef enum { + FUNCTION, + SUB, + END, + AS, + TYPE, + IDENTIFIER, + INTEGER, + STRING, + OPAREN, + CPAREN, + COMMA, + QUOTE, + EQUALS, + TOKEN_TYPE_COUNT +} PBTokenType; + +typedef struct { + PBTokenType type; + std::string value; +} PBToken; + +class Tokenizer { + std::unordered_map tokentypes; + std::string code; +public: + Tokenizer(std::string); + std::vector tokenize(); + PBToken tokenize_one_token(); +}; +#endif