From 3a5d23502f3c34e1047fd93d02a777eda6a32a41 Mon Sep 17 00:00:00 2001 From: Sam Varshavchik Date: Sun, 6 Dec 2020 20:29:07 -0500 Subject: courier-unicode: partial cleanup of bidirectional strings. --- unicode/biditest2.C | 50 +++++++++++++++++++++++++++++++++++++++++++- unicode/book.xml | 32 +++++++++++++++++++++++++++- unicode/courier-unicode.h.in | 21 +++++++++++++++++++ unicode/unicodecpp.C | 31 +++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 2 deletions(-) (limited to 'unicode') diff --git a/unicode/biditest2.C b/unicode/biditest2.C index ded76be..cbd8c25 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -451,6 +451,53 @@ void character_test() std::cout << std::endl; } +void exception_test() +{ + std::u32string s{U"שלום"}; + + auto res=unicode::bidi_calc(s); + + int thrown=0; + int caught=0; + + try + { + unicode::bidi_reorder(s, std::get<0>(res), + [&] + (size_t, size_t) + { + ++thrown; + throw 42; + }); + } catch(int n) + { + caught += n; + } + + if (thrown != 1 || caught != 42) + { + std::cerr << "Exception handling failed" + << std::endl; + } +} + +void partial_reorder_cleanup() +{ + std::u32string s{U"שלום"}; + + auto res=unicode::bidi_calc(s); + + unicode::bidi_reorder(s, std::get<0>(res)); + + unicode::bidi_cleanup(s, std::get<0>(res), + [] + (size_t) + { + }, + 0, + 0, 3); +} + int main(int argc, char **argv) { DEBUGDUMP=fopen("/dev/null", "w"); @@ -459,7 +506,8 @@ int main(int argc, char **argv) perror("/dev/null"); exit(1); } - + exception_test(); + partial_reorder_cleanup(); latin_test(); character_test(); return 0; diff --git a/unicode/book.xml b/unicode/book.xml index 4838364..45686dc 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -2851,16 +2851,26 @@ See COPYING for distribution information. void unicode::bidi_cleanup std::u32string &string - const std::function<void (size_t) noexcept> &removed_callback + const std::function<void (size_t) noexcept> &removed_callback=[](size_t){} int cleanup_options + + int unicode::bidi_cleanup + std::u32string &string + std::vector <unicode_bidi_level_t> &levels + const std::function<void (size_t) noexcept> &removed_callback=[](size_t){} + int cleanup_options=0 + + int unicode::bidi_cleanup std::u32string &string std::vector <unicode_bidi_level_t> &levels const std::function<void (size_t) noexcept> &removed_callback int cleanup_options + size_t starting_pos + size_t n @@ -3030,6 +3040,26 @@ auto [levels, level]=unicode::bidi_calc(types); characters). + + + + unicode::bidi_reorder and + unicode::bidi_cleanup take two optional + parameters (defaulted values or overloaded) specifying + an optional starting position and number of characters that + define a subset of the original string that gets reordered + or cleaned up. + + + + This unicode::bidi_cleanup does not + trim off the passed in string and embedding level buffer, + since it affects only a subset of the string. The number + of times the removed character callback gets invoked + indicates how much the substring should be trimmed off. + + + diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 469b456..4bc7b55 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -2265,6 +2265,27 @@ int bidi_cleanup(std::u32string &string, [](size_t) {}, int cleanup_options=0); +//! Clean up a substring of the unicode string. + +//! The substring gets specified by starting_pos and n. +//! +//! \note +//! The removed position parameter to the removed_callback is based on +//! the given starting_position. Add starting_pos to it to get the +//! actual removed index. +//! +//! Returns non-0 in case of non-matching level buffer size. +//! +//! The final size of the returned string is determined by counting +//! how many calls to removed_callback were made. + +int bidi_cleanup(std::u32string &string, + std::vector &levels, + const std::function &removed_callback, + int cleanup_options, + size_t starting_pos, + size_t n); + //! Convert Unicode string from canonical rendering order to logical order. int bidi_logical_order(std::u32string &string, std::vector &levels, diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 8c43b31..5677b86 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -636,6 +636,8 @@ struct cb_wrapper { template void operator()(Args && ...args) { + if (caught) + return; try { cb(std::forward(args)...); } catch (...) @@ -771,6 +773,35 @@ int unicode::bidi_cleanup(std::u32string &string, return 0; } +int unicode::bidi_cleanup(std::u32string &string, + std::vector &levels, + const std::function &lambda, + int cleanup_options, + size_t starting_pos, + size_t n) +{ + size_t s=string.size(); + + if (levels.size() != s) + return -1; + + if (starting_pos >= s) + return 0; + + if (n > s-starting_pos) + n=s-starting_pos; + + cb_wrapper cb{lambda}; + unicode_bidi_cleanup(&string[starting_pos], + &levels[starting_pos], + n, + cleanup_options, + removed_callback, + reinterpret_cast(&cb)); + cb.rethrow(); + return 0; +} + int unicode::bidi_logical_order(std::u32string &string, std::vector &levels, unicode_bidi_level_t paragraph_embedding, -- cgit v1.2.3