From 3a5d23502f3c34e1047fd93d02a777eda6a32a41 Mon Sep 17 00:00:00 2001
From: Sam Varshavchik
Date: Sun, 6 Dec 2020 20:29:07 -0500
Subject: courier-unicode: partial cleanup of bidirectional strings.
---
unicode/biditest2.C | 50 +++++++++++++++++++++++++++++++++++++++++++-
unicode/book.xml | 32 +++++++++++++++++++++++++++-
unicode/courier-unicode.h.in | 21 +++++++++++++++++++
unicode/unicodecpp.C | 31 +++++++++++++++++++++++++++
4 files changed, 132 insertions(+), 2 deletions(-)
(limited to 'unicode')
diff --git a/unicode/biditest2.C b/unicode/biditest2.C
index ded76be..cbd8c25 100644
--- a/unicode/biditest2.C
+++ b/unicode/biditest2.C
@@ -451,6 +451,53 @@ void character_test()
std::cout << std::endl;
}
+void exception_test()
+{
+ std::u32string s{U"שלום"};
+
+ auto res=unicode::bidi_calc(s);
+
+ int thrown=0;
+ int caught=0;
+
+ try
+ {
+ unicode::bidi_reorder(s, std::get<0>(res),
+ [&]
+ (size_t, size_t)
+ {
+ ++thrown;
+ throw 42;
+ });
+ } catch(int n)
+ {
+ caught += n;
+ }
+
+ if (thrown != 1 || caught != 42)
+ {
+ std::cerr << "Exception handling failed"
+ << std::endl;
+ }
+}
+
+void partial_reorder_cleanup()
+{
+ std::u32string s{U"שלום"};
+
+ auto res=unicode::bidi_calc(s);
+
+ unicode::bidi_reorder(s, std::get<0>(res));
+
+ unicode::bidi_cleanup(s, std::get<0>(res),
+ []
+ (size_t)
+ {
+ },
+ 0,
+ 0, 3);
+}
+
int main(int argc, char **argv)
{
DEBUGDUMP=fopen("/dev/null", "w");
@@ -459,7 +506,8 @@ int main(int argc, char **argv)
perror("/dev/null");
exit(1);
}
-
+ exception_test();
+ partial_reorder_cleanup();
latin_test();
character_test();
return 0;
diff --git a/unicode/book.xml b/unicode/book.xml
index 4838364..45686dc 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -2851,16 +2851,26 @@ See COPYING for distribution information.
void unicode::bidi_cleanup
std::u32string &string
- const std::function<void (size_t) noexcept> &removed_callback
+ const std::function<void (size_t) noexcept> &removed_callback=[](size_t){}
int cleanup_options
+
+ int unicode::bidi_cleanup
+ std::u32string &string
+ std::vector <unicode_bidi_level_t> &levels
+ const std::function<void (size_t) noexcept> &removed_callback=[](size_t){}
+ int cleanup_options=0
+
+
int unicode::bidi_cleanup
std::u32string &string
std::vector <unicode_bidi_level_t> &levels
const std::function<void (size_t) noexcept> &removed_callback
int cleanup_options
+ size_t starting_pos
+ size_t n
@@ -3030,6 +3040,26 @@ auto [levels, level]=unicode::bidi_calc(types);
characters).
+
+
+
+ unicode::bidi_reorder and
+ unicode::bidi_cleanup take two optional
+ parameters (defaulted values or overloaded) specifying
+ an optional starting position and number of characters that
+ define a subset of the original string that gets reordered
+ or cleaned up.
+
+
+
+ This unicode::bidi_cleanup does not
+ trim off the passed in string and embedding level buffer,
+ since it affects only a subset of the string. The number
+ of times the removed character callback gets invoked
+ indicates how much the substring should be trimmed off.
+
+
+
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 469b456..4bc7b55 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -2265,6 +2265,27 @@ int bidi_cleanup(std::u32string &string,
[](size_t) {},
int cleanup_options=0);
+//! Clean up a substring of the unicode string.
+
+//! The substring gets specified by starting_pos and n.
+//!
+//! \note
+//! The removed position parameter to the removed_callback is based on
+//! the given starting_position. Add starting_pos to it to get the
+//! actual removed index.
+//!
+//! Returns non-0 in case of non-matching level buffer size.
+//!
+//! The final size of the returned string is determined by counting
+//! how many calls to removed_callback were made.
+
+int bidi_cleanup(std::u32string &string,
+ std::vector &levels,
+ const std::function &removed_callback,
+ int cleanup_options,
+ size_t starting_pos,
+ size_t n);
+
//! Convert Unicode string from canonical rendering order to logical order.
int bidi_logical_order(std::u32string &string,
std::vector &levels,
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index 8c43b31..5677b86 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -636,6 +636,8 @@ struct cb_wrapper {
template void operator()(Args && ...args)
{
+ if (caught)
+ return;
try {
cb(std::forward(args)...);
} catch (...)
@@ -771,6 +773,35 @@ int unicode::bidi_cleanup(std::u32string &string,
return 0;
}
+int unicode::bidi_cleanup(std::u32string &string,
+ std::vector &levels,
+ const std::function &lambda,
+ int cleanup_options,
+ size_t starting_pos,
+ size_t n)
+{
+ size_t s=string.size();
+
+ if (levels.size() != s)
+ return -1;
+
+ if (starting_pos >= s)
+ return 0;
+
+ if (n > s-starting_pos)
+ n=s-starting_pos;
+
+ cb_wrapper cb{lambda};
+ unicode_bidi_cleanup(&string[starting_pos],
+ &levels[starting_pos],
+ n,
+ cleanup_options,
+ removed_callback,
+ reinterpret_cast(&cb));
+ cb.rethrow();
+ return 0;
+}
+
int unicode::bidi_logical_order(std::u32string &string,
std::vector &levels,
unicode_bidi_level_t paragraph_embedding,
--
cgit v1.2.3