#pragma once #include #include "token.h" #include "scan.h" #include "lib_begin.h" namespace cgv { namespace utils { /** a line in a text is simply represented as a token */ struct line : public token { /// construct from range line(const char* _b = 0, const char* _e = 0) : token(_b, _e) {} }; /// different types that a typed_token can have enum token_type { PLAIN, URL, FLOAT_VALUE, TIME_VALUE, DATE_VALUE }; /** a typed token also stores the type and value of a parsed token. Although a union of the different typed values should have been used, the values of different type are stored successively in the typed_token because of problems with standard construction. */ struct typed_token : public token { token_type type; typed_token(const token& t, token_type tt = PLAIN) : token(t), type(tt), date_value(0) {} double float_value; utils::time time_value; utils::date date_value; }; /** this function splits a text range into tokens. The function is typically applied to a line. One can specify whitespaces that are used to detect split locations but do not generate any tokens. The separators also mark split locations and generate tokens. If merge_separators is true, successive separators are merged into one token. Finally, one can specify a pair of character lists open_paranthesis and close_paranthesis that are used to overwrite the splitting mechanism and allow to generate tokens including whitespaces and separators. Some examples: applying vector tokens; split_to_tokens(line.begin, line.end, tokens, ".,;:", "'{", "'}", " \t"); to the line Hello world... This is a "split to tokens" test line. {copyright: S. G.} generates the following tokens: Hello world ... This is a split to tokens test line . copyright: S. G. */ extern CGV_API void split_to_tokens( const char* begin, const char* end, std::vector& tokens, const std::string& separators, bool merge_separators = true, const std::string& open_parenthesis = "", const std::string& close_parenthesis = "", const std::string& whitespaces = " \t\n", unsigned int max_nr_tokens = -1); /// text range given as token inline void split_to_tokens( const token& tok, std::vector& tokens, const std::string& separators, bool merge_separators = true, const std::string& open_parenthesis = "", const std::string& close_parenthesis = "", const std::string& whitespaces = " \t\n", unsigned int /*max_nr_tokens*/ = -1) { split_to_tokens( tok.begin,tok.end, tokens, separators,merge_separators,open_parenthesis,close_parenthesis,whitespaces); } /// text range given as string inline void split_to_tokens( const std::string& s, std::vector& tokens, const std::string& separators, bool merge_separators = true, const std::string& open_parenthesis = "", const std::string& close_parenthesis = "", const std::string& whitespaces = " \t\n", unsigned int /*max_nr_tokens*/ = (unsigned int)-1) { split_to_tokens(&s[0],&s[0]+s.size(),tokens,separators,merge_separators,open_parenthesis,close_parenthesis,whitespaces); } /** this function splits a text range at the newline characters into single lines. If truncate_trailing_spaces is true all spaces and tabs at the end of the line are excluded. */ extern CGV_API void split_to_lines(const char* begin, const char* end, std::vector& lines, bool truncate_trailing_spaces = true); /// text range given as token inline void split_to_lines(const token& tok, std::vector& lines, bool truncate_trailing_spaces = true) { split_to_lines(tok.begin,tok.end,lines,truncate_trailing_spaces); } /// text range given as string inline void split_to_lines(const std::string& s, std::vector& lines, bool truncate_trailing_spaces = true) { split_to_lines(&s[0],&s[0]+s.size(),lines,truncate_trailing_spaces); } /** the input range must begin with an open parenthesis. The function finds the matching closing parenthesis and returns a token with the content inside the parentheses. Examples: balanced_find_content( "a+b", => , '(', ')' ) ==> false; balanced_find_content( "(a+b)", => "a+b", '(', ')' ) ==> true; balanced_find_content( "(a+(c*d),b)", => "a+(c*d),b", '(', ')' ) ==> true; */ extern CGV_API bool balanced_find_content( const char* begin, const char* end, token& content, char open_parenthesis, char close_parenthesis); inline bool balanced_find_content( const token& expression, token& content, char open_parenthesis, char close_parenthesis) { return balanced_find_content(expression.begin, expression.end, content,open_parenthesis,close_parenthesis); } inline bool balanced_find_content( const std::string& expression, token& content, char open_parenthesis, char close_parenthesis) { return balanced_find_content(&expression[0], &expression[0]+expression.size(), content,open_parenthesis,close_parenthesis); } } } #include