CGII/framework/include/cgv/utils/advanced_scan.h

#pragma once

#include <vector>

#include "token.h"
#include "scan.h"

#include "lib_begin.h"

namespace cgv {
	namespace utils {

/** a line in a text is simply represented as a token */
struct line : public token
{
	/// construct from range
	line(const char* _b = 0, const char* _e = 0) : token(_b, _e) {}
};

/// different types that a typed_token can have
enum token_type {
	PLAIN, URL, FLOAT_VALUE, TIME_VALUE, DATE_VALUE
};

/** a typed token also stores the type and value of a parsed token. Although
    a union of the different typed values should have been used, the values
	 of different type are stored successively in the typed_token because of 
	 problems with standard construction. */
struct typed_token : public token
{
	token_type type;
	typed_token(const token& t, token_type tt = PLAIN) : token(t), type(tt), date_value(0) {}
	double float_value;
	utils::time time_value;
	utils::date date_value;
};

/** this function splits a text range into tokens. The function is typically applied
    to a line. One can specify whitespaces that are used to detect split locations
	 but do not generate any tokens. The separators also mark split locations and 
	 generate tokens. If merge_separators is true, successive separators are merged
	 into one token. Finally, one can specify a pair of character lists 
	 open_paranthesis and close_paranthesis that are used to overwrite the 
	 splitting mechanism and allow to generate tokens including whitespaces and 
	 separators. Some examples:

	 applying 

	 vector<token> tokens;
	 split_to_tokens(line.begin, line.end, tokens, ".,;:", "'{", "'}", " \t");

	 to the line

	 Hello world... This is a "split to tokens" test line. {copyright: S. G.}

	 generates the following tokens:

	 Hello
	 world
	 ...
	 This
	 is
	 a
	 split to tokens
	 test
	 line
	 .
	 copyright: S. G.
*/
extern CGV_API void split_to_tokens(
			const char* begin, const char* end,
			std::vector<token>& tokens,
			const std::string& separators, 
			bool merge_separators = true,
			const std::string& open_parenthesis = "", const std::string& close_parenthesis = "", 
			const std::string& whitespaces = " \t\n",
			unsigned int max_nr_tokens = -1);

/// text range given as token
inline void split_to_tokens(
			const token& tok,
			std::vector<token>& tokens,
			const std::string& separators, 
			bool merge_separators = true,
			const std::string& open_parenthesis = "", const std::string& close_parenthesis = "", 
			const std::string& whitespaces = " \t\n",
			unsigned int /*max_nr_tokens*/ = -1) 
{
	split_to_tokens(
		tok.begin,tok.end,
		tokens,
		separators,merge_separators,open_parenthesis,close_parenthesis,whitespaces); 
}

/// text range given as string
inline void split_to_tokens(
			const std::string& s,
			std::vector<token>& tokens,
			const std::string& separators, 
			bool merge_separators = true,
			const std::string& open_parenthesis = "", const std::string& close_parenthesis = "", 
			const std::string& whitespaces = " \t\n",
			unsigned int /*max_nr_tokens*/ = (unsigned int)-1)
{
	split_to_tokens(&s[0],&s[0]+s.size(),tokens,separators,merge_separators,open_parenthesis,close_parenthesis,whitespaces);
}

/** this function splits a text range at the newline characters into single lines.
    If truncate_trailing_spaces is true all spaces and tabs at the end of the
	 line are excluded. */
extern CGV_API void split_to_lines(const char* begin, const char* end, 
											  std::vector<line>& lines, 
											  bool truncate_trailing_spaces = true);
/// text range given as token
inline void split_to_lines(const token& tok, 
											  std::vector<line>& lines, 
											  bool truncate_trailing_spaces = true) { 
	split_to_lines(tok.begin,tok.end,lines,truncate_trailing_spaces); 
}

/// text range given as string
inline void split_to_lines(const std::string& s, 
											  std::vector<line>& lines, 
											  bool truncate_trailing_spaces = true) { 
	split_to_lines(&s[0],&s[0]+s.size(),lines,truncate_trailing_spaces); 
}

/** the input range must begin with an open parenthesis. The function
    finds the matching closing parenthesis and returns a token with the
	 content inside the parentheses.

	 Examples:

	 balanced_find_content( "a+b", => , '(', ')' ) ==> false;
	 balanced_find_content( "(a+b)", => "a+b", '(', ')' ) ==> true;
	 balanced_find_content( "(a+(c*d),b)", => "a+(c*d),b", '(', ')' ) ==> true;
*/
extern CGV_API bool balanced_find_content(
	const char* begin, const char* end, 
	token& content, 
	char open_parenthesis, char close_parenthesis);

inline bool balanced_find_content(
			const token& expression, 
			token& content, 
			char open_parenthesis, char close_parenthesis) {
	return balanced_find_content(expression.begin, expression.end,
		content,open_parenthesis,close_parenthesis);
}
inline bool balanced_find_content(
			const std::string& expression, 
			token& content, 
			char open_parenthesis, char close_parenthesis) {
	return balanced_find_content(&expression[0], &expression[0]+expression.size(),
		content,open_parenthesis,close_parenthesis);
}

	}
}

#include <cgv/config/lib_end.h>
Initial Commit 2018-05-17 13:50:03 +00:00			`#pragma once`

			`#include <vector>`

			`#include "token.h"`
			`#include "scan.h"`

			`#include "lib_begin.h"`

			`namespace cgv {`
			`namespace utils {`

			`/** a line in a text is simply represented as a token */`
			`struct line : public token`
			`{`
			`/// construct from range`
			`line(const char* _b = 0, const char* _e = 0) : token(_b, _e) {}`
			`};`

			`/// different types that a typed_token can have`
			`enum token_type {`
			`PLAIN, URL, FLOAT_VALUE, TIME_VALUE, DATE_VALUE`
			`};`

			`/** a typed token also stores the type and value of a parsed token. Although`
			`a union of the different typed values should have been used, the values`
			`of different type are stored successively in the typed_token because of`
			`problems with standard construction. */`
			`struct typed_token : public token`
			`{`
			`token_type type;`
			`typed_token(const token& t, token_type tt = PLAIN) : token(t), type(tt), date_value(0) {}`
			`double float_value;`
			`utils::time time_value;`
			`utils::date date_value;`
			`};`

			`/** this function splits a text range into tokens. The function is typically applied`
			`to a line. One can specify whitespaces that are used to detect split locations`
			`but do not generate any tokens. The separators also mark split locations and`
			`generate tokens. If merge_separators is true, successive separators are merged`
			`into one token. Finally, one can specify a pair of character lists`
			`open_paranthesis and close_paranthesis that are used to overwrite the`
			`splitting mechanism and allow to generate tokens including whitespaces and`
			`separators. Some examples:`

			`applying`

			`vector<token> tokens;`
			`split_to_tokens(line.begin, line.end, tokens, ".,;:", "'{", "'}", " \t");`

			`to the line`

			`Hello world... This is a "split to tokens" test line. {copyright: S. G.}`

			`generates the following tokens:`

			`Hello`
			`world`
			`...`
			`This`
			`is`
			`a`
			`split to tokens`
			`test`
			`line`
			`.`
			`copyright: S. G.`
			`*/`
			`extern CGV_API void split_to_tokens(`
			`const char* begin, const char* end,`
			`std::vector<token>& tokens,`
			`const std::string& separators,`
			`bool merge_separators = true,`
			`const std::string& open_parenthesis = "", const std::string& close_parenthesis = "",`
			`const std::string& whitespaces = " \t\n",`
			`unsigned int max_nr_tokens = -1);`

			`/// text range given as token`
			`inline void split_to_tokens(`
			`const token& tok,`
			`std::vector<token>& tokens,`
			`const std::string& separators,`
			`bool merge_separators = true,`
			`const std::string& open_parenthesis = "", const std::string& close_parenthesis = "",`
			`const std::string& whitespaces = " \t\n",`
			`unsigned int /max_nr_tokens/ = -1)`
			`{`
			`split_to_tokens(`
			`tok.begin,tok.end,`
			`tokens,`
			`separators,merge_separators,open_parenthesis,close_parenthesis,whitespaces);`
			`}`

			`/// text range given as string`
			`inline void split_to_tokens(`
			`const std::string& s,`
			`std::vector<token>& tokens,`
			`const std::string& separators,`
			`bool merge_separators = true,`
			`const std::string& open_parenthesis = "", const std::string& close_parenthesis = "",`
			`const std::string& whitespaces = " \t\n",`
			`unsigned int /max_nr_tokens/ = (unsigned int)-1)`
			`{`
			`split_to_tokens(&s[0],&s[0]+s.size(),tokens,separators,merge_separators,open_parenthesis,close_parenthesis,whitespaces);`
			`}`

			`/** this function splits a text range at the newline characters into single lines.`
			`If truncate_trailing_spaces is true all spaces and tabs at the end of the`
			`line are excluded. */`
			`extern CGV_API void split_to_lines(const char* begin, const char* end,`
			`std::vector<line>& lines,`
			`bool truncate_trailing_spaces = true);`
			`/// text range given as token`
			`inline void split_to_lines(const token& tok,`
			`std::vector<line>& lines,`
			`bool truncate_trailing_spaces = true) {`
			`split_to_lines(tok.begin,tok.end,lines,truncate_trailing_spaces);`
			`}`

			`/// text range given as string`
			`inline void split_to_lines(const std::string& s,`
			`std::vector<line>& lines,`
			`bool truncate_trailing_spaces = true) {`
			`split_to_lines(&s[0],&s[0]+s.size(),lines,truncate_trailing_spaces);`
			`}`

			`/** the input range must begin with an open parenthesis. The function`
			`finds the matching closing parenthesis and returns a token with the`
			`content inside the parentheses.`

			`Examples:`

			`balanced_find_content( "a+b", => , '(', ')' ) ==> false;`
			`balanced_find_content( "(a+b)", => "a+b", '(', ')' ) ==> true;`
			`balanced_find_content( "(a+(cd),b)", => "a+(cd),b", '(', ')' ) ==> true;`
			`*/`
			`extern CGV_API bool balanced_find_content(`
			`const char* begin, const char* end,`
			`token& content,`
			`char open_parenthesis, char close_parenthesis);`

			`inline bool balanced_find_content(`
			`const token& expression,`
			`token& content,`
			`char open_parenthesis, char close_parenthesis) {`
			`return balanced_find_content(expression.begin, expression.end,`
			`content,open_parenthesis,close_parenthesis);`
			`}`
			`inline bool balanced_find_content(`
			`const std::string& expression,`
			`token& content,`
			`char open_parenthesis, char close_parenthesis) {`
			`return balanced_find_content(&expression[0], &expression[0]+expression.size(),`
			`content,open_parenthesis,close_parenthesis);`
			`}`

			`}`
			`}`

			`#include <cgv/config/lib_end.h>`