diff options
| author | Sam Varshavchik | 2020-12-06 20:29:07 -0500 |
|---|---|---|
| committer | Sam Varshavchik | 2020-12-06 20:29:07 -0500 |
| commit | 3a5d23502f3c34e1047fd93d02a777eda6a32a41 (patch) | |
| tree | d4b8de6001f5d9725be90a2236e011d473b1d4a2 /unicode | |
| parent | dba04f8c6b41ff124485d620781c651ce1352ee4 (diff) | |
| download | courier-libs-3a5d23502f3c34e1047fd93d02a777eda6a32a41.tar.bz2 | |
courier-unicode: partial cleanup of bidirectional strings.
Diffstat (limited to 'unicode')
| -rw-r--r-- | unicode/biditest2.C | 50 | ||||
| -rw-r--r-- | unicode/book.xml | 32 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 21 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 31 |
4 files changed, 132 insertions, 2 deletions
diff --git a/unicode/biditest2.C b/unicode/biditest2.C index ded76be..cbd8c25 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -451,6 +451,53 @@ void character_test() std::cout << std::endl; } +void exception_test() +{ + std::u32string s{U"שלום"}; + + auto res=unicode::bidi_calc(s); + + int thrown=0; + int caught=0; + + try + { + unicode::bidi_reorder(s, std::get<0>(res), + [&] + (size_t, size_t) + { + ++thrown; + throw 42; + }); + } catch(int n) + { + caught += n; + } + + if (thrown != 1 || caught != 42) + { + std::cerr << "Exception handling failed" + << std::endl; + } +} + +void partial_reorder_cleanup() +{ + std::u32string s{U"שלום"}; + + auto res=unicode::bidi_calc(s); + + unicode::bidi_reorder(s, std::get<0>(res)); + + unicode::bidi_cleanup(s, std::get<0>(res), + [] + (size_t) + { + }, + 0, + 0, 3); +} + int main(int argc, char **argv) { DEBUGDUMP=fopen("/dev/null", "w"); @@ -459,7 +506,8 @@ int main(int argc, char **argv) perror("/dev/null"); exit(1); } - + exception_test(); + partial_reorder_cleanup(); latin_test(); character_test(); return 0; diff --git a/unicode/book.xml b/unicode/book.xml index 4838364..45686dc 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -2851,7 +2851,7 @@ See COPYING for distribution information. <funcprototype> <funcdef>void <function>unicode::bidi_cleanup</function></funcdef> <paramdef>std::u32string &<parameter>string</parameter></paramdef> - <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> + <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter>=[](size_t){}</paramdef> <paramdef>int <parameter>cleanup_options</parameter></paramdef> </funcprototype> @@ -2859,8 +2859,18 @@ See COPYING for distribution information. <funcdef>int <function>unicode::bidi_cleanup</function></funcdef> <paramdef>std::u32string &<parameter>string</parameter></paramdef> <paramdef>std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> + <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter>=[](size_t){}</paramdef> + <paramdef>int <parameter>cleanup_options</parameter>=0</paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode::bidi_cleanup</function></funcdef> + <paramdef>std::u32string &<parameter>string</parameter></paramdef> + <paramdef>std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> <paramdef>int <parameter>cleanup_options</parameter></paramdef> + <paramdef>size_t <parameter>starting_pos</parameter></paramdef> + <paramdef>size_t <parameter>n</parameter></paramdef> </funcprototype> <funcprototype> @@ -3030,6 +3040,26 @@ auto [levels, level]=unicode::bidi_calc(types); characters). </para> </listitem> + + <listitem> + <para> + <function>unicode::bidi_reorder</function> and + <function>unicode::bidi_cleanup</function> take two optional + parameters (defaulted values or overloaded) specifying + an optional starting position and number of characters that + define a subset of the original string that gets reordered + or cleaned up. + </para> + + <para> + This <function>unicode::bidi_cleanup</function> does not + trim off the passed in string and embedding level buffer, + since it affects only a subset of the string. The number + of times the removed character callback gets invoked + indicates how much the substring should be trimmed off. + </para> + </listitem> + </itemizedlist> <refsect2 id="unicode_cpp_bidi_literals"> diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 469b456..4bc7b55 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -2265,6 +2265,27 @@ int bidi_cleanup(std::u32string &string, [](size_t) {}, int cleanup_options=0); +//! Clean up a substring of the unicode string. + +//! The substring gets specified by starting_pos and n. +//! +//! \note +//! The removed position parameter to the removed_callback is based on +//! the given starting_position. Add starting_pos to it to get the +//! actual removed index. +//! +//! Returns non-0 in case of non-matching level buffer size. +//! +//! The final size of the returned string is determined by counting +//! how many calls to removed_callback were made. + +int bidi_cleanup(std::u32string &string, + std::vector<unicode_bidi_level_t> &levels, + const std::function<void (size_t)> &removed_callback, + int cleanup_options, + size_t starting_pos, + size_t n); + //! Convert Unicode string from canonical rendering order to logical order. int bidi_logical_order(std::u32string &string, std::vector<unicode_bidi_level_t> &levels, diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 8c43b31..5677b86 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -636,6 +636,8 @@ struct cb_wrapper { template<typename ...Args> void operator()(Args && ...args) { + if (caught) + return; try { cb(std::forward<Args>(args)...); } catch (...) @@ -771,6 +773,35 @@ int unicode::bidi_cleanup(std::u32string &string, return 0; } +int unicode::bidi_cleanup(std::u32string &string, + std::vector<unicode_bidi_level_t> &levels, + const std::function<void (size_t)> &lambda, + int cleanup_options, + size_t starting_pos, + size_t n) +{ + size_t s=string.size(); + + if (levels.size() != s) + return -1; + + if (starting_pos >= s) + return 0; + + if (n > s-starting_pos) + n=s-starting_pos; + + cb_wrapper<void (size_t)> cb{lambda}; + unicode_bidi_cleanup(&string[starting_pos], + &levels[starting_pos], + n, + cleanup_options, + removed_callback, + reinterpret_cast<void *>(&cb)); + cb.rethrow(); + return 0; +} + int unicode::bidi_logical_order(std::u32string &string, std::vector<unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, |
