summaryrefslogtreecommitdiffstats
path: root/unicode
diff options
context:
space:
mode:
authorSam Varshavchik2020-12-06 20:29:07 -0500
committerSam Varshavchik2020-12-06 20:29:07 -0500
commit3a5d23502f3c34e1047fd93d02a777eda6a32a41 (patch)
treed4b8de6001f5d9725be90a2236e011d473b1d4a2 /unicode
parentdba04f8c6b41ff124485d620781c651ce1352ee4 (diff)
downloadcourier-libs-3a5d23502f3c34e1047fd93d02a777eda6a32a41.tar.bz2
courier-unicode: partial cleanup of bidirectional strings.
Diffstat (limited to 'unicode')
-rw-r--r--unicode/biditest2.C50
-rw-r--r--unicode/book.xml32
-rw-r--r--unicode/courier-unicode.h.in21
-rw-r--r--unicode/unicodecpp.C31
4 files changed, 132 insertions, 2 deletions
diff --git a/unicode/biditest2.C b/unicode/biditest2.C
index ded76be..cbd8c25 100644
--- a/unicode/biditest2.C
+++ b/unicode/biditest2.C
@@ -451,6 +451,53 @@ void character_test()
std::cout << std::endl;
}
+void exception_test()
+{
+ std::u32string s{U"שלום"};
+
+ auto res=unicode::bidi_calc(s);
+
+ int thrown=0;
+ int caught=0;
+
+ try
+ {
+ unicode::bidi_reorder(s, std::get<0>(res),
+ [&]
+ (size_t, size_t)
+ {
+ ++thrown;
+ throw 42;
+ });
+ } catch(int n)
+ {
+ caught += n;
+ }
+
+ if (thrown != 1 || caught != 42)
+ {
+ std::cerr << "Exception handling failed"
+ << std::endl;
+ }
+}
+
+void partial_reorder_cleanup()
+{
+ std::u32string s{U"שלום"};
+
+ auto res=unicode::bidi_calc(s);
+
+ unicode::bidi_reorder(s, std::get<0>(res));
+
+ unicode::bidi_cleanup(s, std::get<0>(res),
+ []
+ (size_t)
+ {
+ },
+ 0,
+ 0, 3);
+}
+
int main(int argc, char **argv)
{
DEBUGDUMP=fopen("/dev/null", "w");
@@ -459,7 +506,8 @@ int main(int argc, char **argv)
perror("/dev/null");
exit(1);
}
-
+ exception_test();
+ partial_reorder_cleanup();
latin_test();
character_test();
return 0;
diff --git a/unicode/book.xml b/unicode/book.xml
index 4838364..45686dc 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -2851,7 +2851,7 @@ See COPYING for distribution information.
<funcprototype>
<funcdef>void <function>unicode::bidi_cleanup</function></funcdef>
<paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
- <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+ <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter>=[](size_t){}</paramdef>
<paramdef>int <parameter>cleanup_options</parameter></paramdef>
</funcprototype>
@@ -2859,8 +2859,18 @@ See COPYING for distribution information.
<funcdef>int <function>unicode::bidi_cleanup</function></funcdef>
<paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
<paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+ <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter>=[](size_t){}</paramdef>
+ <paramdef>int <parameter>cleanup_options</parameter>=0</paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>int <function>unicode::bidi_cleanup</function></funcdef>
+ <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
<paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
<paramdef>int <parameter>cleanup_options</parameter></paramdef>
+ <paramdef>size_t <parameter>starting_pos</parameter></paramdef>
+ <paramdef>size_t <parameter>n</parameter></paramdef>
</funcprototype>
<funcprototype>
@@ -3030,6 +3040,26 @@ auto [levels, level]=unicode::bidi_calc(types);
characters).
</para>
</listitem>
+
+ <listitem>
+ <para>
+ <function>unicode::bidi_reorder</function> and
+ <function>unicode::bidi_cleanup</function> take two optional
+ parameters (defaulted values or overloaded) specifying
+ an optional starting position and number of characters that
+ define a subset of the original string that gets reordered
+ or cleaned up.
+ </para>
+
+ <para>
+ This <function>unicode::bidi_cleanup</function> does not
+ trim off the passed in string and embedding level buffer,
+ since it affects only a subset of the string. The number
+ of times the removed character callback gets invoked
+ indicates how much the substring should be trimmed off.
+ </para>
+ </listitem>
+
</itemizedlist>
<refsect2 id="unicode_cpp_bidi_literals">
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 469b456..4bc7b55 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -2265,6 +2265,27 @@ int bidi_cleanup(std::u32string &string,
[](size_t) {},
int cleanup_options=0);
+//! Clean up a substring of the unicode string.
+
+//! The substring gets specified by starting_pos and n.
+//!
+//! \note
+//! The removed position parameter to the removed_callback is based on
+//! the given starting_position. Add starting_pos to it to get the
+//! actual removed index.
+//!
+//! Returns non-0 in case of non-matching level buffer size.
+//!
+//! The final size of the returned string is determined by counting
+//! how many calls to removed_callback were made.
+
+int bidi_cleanup(std::u32string &string,
+ std::vector<unicode_bidi_level_t> &levels,
+ const std::function<void (size_t)> &removed_callback,
+ int cleanup_options,
+ size_t starting_pos,
+ size_t n);
+
//! Convert Unicode string from canonical rendering order to logical order.
int bidi_logical_order(std::u32string &string,
std::vector<unicode_bidi_level_t> &levels,
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index 8c43b31..5677b86 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -636,6 +636,8 @@ struct cb_wrapper {
template<typename ...Args> void operator()(Args && ...args)
{
+ if (caught)
+ return;
try {
cb(std::forward<Args>(args)...);
} catch (...)
@@ -771,6 +773,35 @@ int unicode::bidi_cleanup(std::u32string &string,
return 0;
}
+int unicode::bidi_cleanup(std::u32string &string,
+ std::vector<unicode_bidi_level_t> &levels,
+ const std::function<void (size_t)> &lambda,
+ int cleanup_options,
+ size_t starting_pos,
+ size_t n)
+{
+ size_t s=string.size();
+
+ if (levels.size() != s)
+ return -1;
+
+ if (starting_pos >= s)
+ return 0;
+
+ if (n > s-starting_pos)
+ n=s-starting_pos;
+
+ cb_wrapper<void (size_t)> cb{lambda};
+ unicode_bidi_cleanup(&string[starting_pos],
+ &levels[starting_pos],
+ n,
+ cleanup_options,
+ removed_callback,
+ reinterpret_cast<void *>(&cb));
+ cb.rethrow();
+ return 0;
+}
+
int unicode::bidi_logical_order(std::u32string &string,
std::vector<unicode_bidi_level_t> &levels,
unicode_bidi_level_t paragraph_embedding,