split out tokenizer class
This commit is contained in:
parent
68ea89ece6
commit
d0714f8664
25
neopb.cpp
25
neopb.cpp
@ -4,27 +4,7 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
typedef enum {
|
#include "tokenizer.hpp"
|
||||||
FUNCTION,
|
|
||||||
SUB,
|
|
||||||
END,
|
|
||||||
AS,
|
|
||||||
TYPE,
|
|
||||||
IDENTIFIER,
|
|
||||||
INTEGER,
|
|
||||||
STRING,
|
|
||||||
OPAREN,
|
|
||||||
CPAREN,
|
|
||||||
COMMA,
|
|
||||||
QUOTE,
|
|
||||||
EQUALS,
|
|
||||||
TOKEN_TYPE_COUNT
|
|
||||||
} PBTokenType;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
PBTokenType type;
|
|
||||||
std::string value;
|
|
||||||
} PBToken;
|
|
||||||
|
|
||||||
std::vector<PBToken> tokenize(std::string code);
|
std::vector<PBToken> tokenize(std::string code);
|
||||||
|
|
||||||
@ -38,6 +18,7 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
std::vector<PBToken> tokenize(std::string code) {
|
std::vector<PBToken> tokenize(std::string code) {
|
||||||
const PBToken tokenize_one = [](std::string fragment) {
|
const PBToken tokenize_one = [](std::string fragment) {
|
||||||
|
//const std::unordered_map<PBTokenType, std::regex> tokentypes = std::unordered_map();
|
||||||
const std::regex re_func("\bfunction\b", std::regex_constants::icase);
|
const std::regex re_func("\bfunction\b", std::regex_constants::icase);
|
||||||
const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
|
const std::regex re_sub( "\bsub\b", std::regex_constants::icase);
|
||||||
const std::regex re_end( "\bend\b", std::regex_constants::icase);
|
const std::regex re_end( "\bend\b", std::regex_constants::icase);
|
||||||
@ -63,7 +44,7 @@ std::vector<PBToken> tokenize(std::string code) {
|
|||||||
while(code.length() > 0) {
|
while(code.length() > 0) {
|
||||||
int split = code.find(' ');
|
int split = code.find(' ');
|
||||||
std::string fragment = split > 0 ? code.substr(0, split) : code;
|
std::string fragment = split > 0 ? code.substr(0, split) : code;
|
||||||
tokens.push_back(tokenize_one(fragment));
|
tokens.push_back(fragment);
|
||||||
}
|
}
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|||||||
0
tokenizer.cpp
Normal file
0
tokenizer.cpp
Normal file
38
tokenizer.hpp
Normal file
38
tokenizer.hpp
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#pragma once
|
||||||
|
#ifndef TOKENIZER_HPP
|
||||||
|
#define TOKENIZER_HPP
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
FUNCTION,
|
||||||
|
SUB,
|
||||||
|
END,
|
||||||
|
AS,
|
||||||
|
TYPE,
|
||||||
|
IDENTIFIER,
|
||||||
|
INTEGER,
|
||||||
|
STRING,
|
||||||
|
OPAREN,
|
||||||
|
CPAREN,
|
||||||
|
COMMA,
|
||||||
|
QUOTE,
|
||||||
|
EQUALS,
|
||||||
|
TOKEN_TYPE_COUNT
|
||||||
|
} PBTokenType;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PBTokenType type;
|
||||||
|
std::string value;
|
||||||
|
} PBToken;
|
||||||
|
|
||||||
|
class Tokenizer {
|
||||||
|
std::unordered_map<PBTokenType, std::regex> tokentypes;
|
||||||
|
std::string code;
|
||||||
|
public:
|
||||||
|
Tokenizer(std::string);
|
||||||
|
std::vector<PBToken> tokenize();
|
||||||
|
PBToken tokenize_one_token();
|
||||||
|
};
|
||||||
|
#endif
|
||||||
Loading…
x
Reference in New Issue
Block a user