Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  
tokenizer.h
1/***************************************************************************
2 copyright : (C) 2002-2008 by Stefano Barbato
3 email : stefano@codesink.org
4
5 $Id: tokenizer.h,v 1.18 2008-10-07 11:44:38 tat Exp $
6 ***************************************************************************/
7#ifndef _MIMETIC_TOKENIZER_H_
8#define _MIMETIC_TOKENIZER_H_
9#include <iterator>
10#include <algorithm>
11#include <set>
12#include <string>
13#include <cstring>
14
15namespace mimetic
16{
17
18template<typename value_type>
19struct IsDelim
20{
21 typedef value_type argument_type;
22 typedef bool result_type;
23
24 bool operator()(const value_type& val) const
25 {
26 return m_delims.count(val) != 0;
27 }
28 template<typename Container>
29 void setDelimList(const Container& cont)
30 {
31 typename Container::const_iterator bit, eit;
32 bit = cont.begin(), eit = cont.end();
33 for(; bit != eit; ++bit)
34 m_delims.insert(*bit);
35 }
36 template<typename Iterator>
37 void setDelimList(Iterator bit, Iterator eit)
38 {
39 for(; bit != eit; ++bit)
40 m_delims.insert(*bit);
41 }
42 void addDelim(const value_type& value)
43 {
44 m_delims.insert(value);
45 }
46 void removeDelim(const value_type& value)
47 {
48 m_delims.erase(value);
49 }
50private:
51 std::set<value_type> m_delims;
52};
53
54template<>
55struct IsDelim<char>
56{
57 typedef char argument_type;
58 typedef bool result_type;
59
60 void setDelimList(const std::string& delims)
61 {
62 setDelimList(delims.begin(), delims.end());
63 }
64 template<typename Iterator>
65 void setDelimList(Iterator bit, Iterator eit)
66 {
67 memset(&m_lookup, 0, sizeof(m_lookup));
68 for(; bit != eit; ++bit)
69 m_lookup[(int)*bit] = 1;
70 }
71 bool operator()(unsigned char val) const
72 {
73 return m_lookup[val] != 0;
74 }
75private:
76 char m_lookup[256];
77};
78
79
80/// Iterator tokenizer template class
81template<class Iterator,typename value_type>
83{
84public:
85 ItTokenizer(Iterator bit, Iterator eit)
86 : m_bit(bit), m_eit(eit), m_tok_eit(bit)
87 {
88 }
89 void setSource(Iterator bit, Iterator eit)
90 {
91 m_bit = bit;
92 m_eit = eit;
93 m_tok_eit = bit;
94 }
95 template<typename DelimCont>
96 void setDelimList(const DelimCont& cont)
97 {
98 m_delimPred.setDelimList(cont);
99 }
100 template<typename It>
101 void setDelimList(It bit, It eit)
102 {
103 m_delimPred.setDelimList(bit, eit);
104 }
105 template<typename DestCont>
106 bool next(DestCont& dst)
107 {
108 dst.erase(dst.begin(), dst.end());
109 if(m_tok_eit == m_eit)
110 return false;
111 m_tok_eit = std::find_if(m_bit, m_eit, m_delimPred);
112 m_matched = 0; // end of input
113 if(m_tok_eit != m_eit)
114 m_matched = *m_tok_eit; // matched delimiter
115 std::copy(m_bit, m_tok_eit, std::back_inserter<DestCont>(dst));
116 m_bit = (m_tok_eit != m_eit && ++m_tok_eit != m_eit ? m_tok_eit :m_eit);
117 return true;
118 }
119 const value_type& matched() const
120 {
121 return m_matched;
122 }
123 void addDelim(const value_type& value)
124 {
125 m_delimPred.addDelim(value);
126 }
127 void removeDelim(const value_type& value)
128 {
129 m_delimPred.removeDelim(value);
130 }
131private:
132 Iterator m_bit, m_eit, m_tok_eit;
133 IsDelim<value_type> m_delimPred;
134 value_type m_matched;
135};
136
137
138/// char container tokenizer template class
139template<typename Container>
140struct ContTokenizer: public ItTokenizer<typename Container::const_iterator,typename Container::value_type>
141{
142 typedef typename Container::value_type value_type;
143 typedef typename Container::iterator iterator;
144 typedef typename Container::const_iterator const_iterator;
145 // i want to be fast here so i don't want to copy "cont"
146 // so "cont" MUST be in scope for all following calls
147 // to next(...).
148 ContTokenizer(const Container* cont)
149 : ItTokenizer<const_iterator, value_type>(cont->begin(), cont->end())
150 {
151 }
152 template<typename DelimCont>
153 ContTokenizer(const Container* cont, const DelimCont& delims)
154 : ItTokenizer<const_iterator,value_type>(cont->begin(), cont->end())
155 {
156 this->setDelimList(delims);
157 }
158 void setSource(const Container* cont)
159 {
160 ItTokenizer<const_iterator,value_type>::setSource(cont->begin(), cont->end());
161 }
162private:
164 ContTokenizer& operator=(const ContTokenizer&);
165};
166
167/// std::string tokenizer
169
170}
171
172#endif
173
Iterator tokenizer template class.
Definition tokenizer.h:83
Definition body.h:18
ContTokenizer< std::string > StringTokenizer
std::string tokenizer
Definition tokenizer.h:168
char container tokenizer template class
Definition tokenizer.h:141