StringTokenizingReader module

Module contents

template<typename InputLineIteratorT>
class StringTokenizingReader

Iterate over a range of input strings and tokenize each one.

This is the third of four steps in the pipeline of reading points in from a file. The first is to read in a file line-by-line. The second is to filter out those lines that are comments. The third is to tokenize the lines that survive into little bits that we can then use to populate a point.

Public Types

typedef InputLineIteratorT input_line_iter_type
typedef InputLineIteratorT::value_type string_type
typedef TokenizedStringIterator iterator
typedef TokenizedStringIterator const const_iterator

Public Functions

StringTokenizingReader()

Initialize an empty reader with default delimiters (space, tab).

StringTokenizingReader(input_line_iter_type Start, input_line_iter_type Finish)

Initialize a tokenizer with an input range and default delimiters.

StringTokenizingReader(input_line_iter_type Start, input_line_iter_type Finish, string_type const &Delim)

Initialize a tokenizer with an input range and your own delimiters.

StringTokenizingReader(StringTokenizingReader const &other)

Copy state from another tokenizer.

virtual ~StringTokenizingReader()
void set_field_delimiter(string_type const &delim)

Set the delimiter character to use in tokenization.

The single character in the string you supply will be used as a field delimiter.

string_type field_delimiter() const

Return the delimiter character currently in use.

void set_escape_character(string_type const &escape)

Set the escape character to use in tokenization.

You must supply a string with either 0 or 1 character to be used as an escape character. The escape character removes the special properties of whatever character follows, usually a newline, separator or quote character.

string_type escape_character() const

Return the escape characters currently in use.

void set_quote_character(string_type const &quote)

Set the quote character to use in tokenization.

The single character in the string you supply (assuming it is not empty) will be used as a quote character. Inside a quoted string (a string that begins and ends with the quote character), field delimiters (e.g. comma) will be ignored. Also, inside a quoted string, embedded quote characters must be escaped.

string_type quote_character() const

Return the quote characters currently in use.

StringTokenizingReader &operator=(StringTokenizingReader const &other)
bool operator==(StringTokenizingReader const &other) const
bool operator!=(StringTokenizingReader const &other) const
void set_input_range(input_line_iter_type const &start, input_line_iter_type const &finish)
iterator begin() const
iterator end() const
const_iterator const_begin() const
const_iterator const_end() const

Private Types

typedef boost::escaped_list_separator<typename input_line_iter_type::value_type::value_type> separator_type
typedef boost::tokenizer<separator_type> tokenizer_type
typedef std::pair<typename tokenizer_type::iterator, typename tokenizer_type::iterator> token_iterator_pair

Private Members

input_line_iter_type InputLinesBegin
input_line_iter_type InputLinesEnd
string_type FieldDelimiter
string_type EscapeCharacter
string_type QuoteCharacter
class TokenizedStringIterator : public std::iterator<std::input_iterator_tag, token_iterator_pair, std::ptrdiff_t, const token_iterator_pair *, token_iterator_pair const&>

Public Types

template<>
typedef tokenizer_type::iterator iterator
template<>
typedef token_iterator_pair value_type

Public Functions

template<>
TokenizedStringIterator()
template<>
~TokenizedStringIterator()
template<>
TokenizedStringIterator(input_line_iter_type Begin, input_line_iter_type End, string_type const &Delim, string_type const &Escape, string_type const &Quote)
template<>
TokenizedStringIterator(TokenizedStringIterator const &other)
template<>
TokenizedStringIterator &operator=(TokenizedStringIterator const &other)
template<>
token_iterator_pair const &operator*() const
template<>
token_iterator_pair const *operator->() const
template<>
TokenizedStringIterator &operator++()
template<>
TokenizedStringIterator &operator++(int)
template<>
bool operator==(TokenizedStringIterator const &other) const
template<>
bool operator!=(TokenizedStringIterator const &other) const

Private Functions

template<>
void _tokenize_this_line()

Private Members

template<>
tokenizer_type *Tokenizer
template<>
token_iterator_pair TokenRangeCurrentString
template<>
input_line_iter_type InputLinesBegin
template<>
input_line_iter_type InputLinesEnd
template<>
string_type FieldDelimiter
template<>
string_type EscapeCharacter
template<>
string_type QuoteCharacter