diff options
| author | Sam Varshavchik | 2013-08-19 16:39:41 -0400 |
|---|---|---|
| committer | Sam Varshavchik | 2013-08-25 14:43:51 -0400 |
| commit | 9c45d9ad13fdf439d44d7443ae75da15ea0223ed (patch) | |
| tree | 7a81a04cb51efb078ee350859a64be2ebc6b8813 /unicode/wordbreaktest.C | |
| parent | a9520698b770168d1f33d6301463bb70a19655ec (diff) | |
| download | courier-libs-9c45d9ad13fdf439d44d7443ae75da15ea0223ed.tar.bz2 | |
Initial checkin
Imported from subversion report, converted to git. Updated all paths in
scripts and makefiles, reflecting the new directory hierarchy.
Diffstat (limited to 'unicode/wordbreaktest.C')
| -rw-r--r-- | unicode/wordbreaktest.C | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/unicode/wordbreaktest.C b/unicode/wordbreaktest.C new file mode 100644 index 0000000..35a1d27 --- /dev/null +++ b/unicode/wordbreaktest.C @@ -0,0 +1,145 @@ +#include "unicode_config.h" +#include "unicode.h" + +#include <iostream> +#include <fstream> +#include <sstream> +#include <iomanip> +#include <algorithm> +#include <functional> +#include <cstdlib> +#include <list> +#include <vector> + +class collect_wordbreakflags : public mail::wordbreak_callback_base { + +public: + + std::vector<bool> flags; + + template<typename iter_type> void operator()(iter_type b, iter_type e) + { + mail::wordbreak_callback_base::operator()(b, e); + } + + using mail::wordbreak_callback_base::operator<<; + +private: + int operator()(bool flag) + { + flags.push_back(flag); + return 0; + } +}; + +static void testsuite() +{ + std::string buf; + int linenum=0; + + std::ifstream fp("WordBreakTest.txt"); + + if (!fp.is_open()) + exit(1); + + while (1) + { + buf.clear(); + + if (std::getline(fp, buf).eof() && buf.empty()) + break; + + ++linenum; + + buf.erase(std::find(buf.begin(), buf.end(), '#'), buf.end()); + + if (buf.empty()) + continue; + + std::list<std::string> words; + + for (std::string::iterator b=buf.begin(), e=buf.end(); b != e;) + { + if (isspace(*b)) + { + ++b; + continue; + } + + std::string::iterator p=b; + + while (b != e) + { + if (isspace(*b)) + break; + ++b; + } + + words.push_back(std::string(p, b)); + } + + std::vector<unicode_char> ubuf; + std::vector<bool> status; + + while (1) + { + if (!words.empty() && words.front().size() > 1) + { + bool flag=false; + std::string s=words.front(); + + words.pop_front(); + + if ((unsigned char)s[0] == + (unsigned char)0xc3 && + (unsigned char)s[1] == (unsigned char)0xb7) + flag=true; + + if (words.empty()) + break; + + status.push_back(flag); + + std::istringstream i(words.front()); + + unicode_char uc; + + i >> std::hex >> uc; + + words.pop_front(); + + if (!i.fail()) + { + ubuf.push_back(uc); + continue; + } + } + + std::cerr << "Parse error, line " << linenum + << ": " << buf << std::endl; + exit(1); + } + + if (linenum == 24) + { + linenum=24; + } + collect_wordbreakflags flags; + + flags(ubuf.begin(), ubuf.end()); + flags.finish(); + + if (status != flags.flags) + { + std::cerr << "Regression, line " << linenum + << ": " << buf << std::endl; + exit(1); + } + } +} + +int main(int argc, char **argv) +{ + testsuite(); + return 0; +} |
