13 files changed, 239 insertions, 198 deletions
diff --git a/imap/ChangeLog b/imap/ChangeLog
index d011c22..abb298a 100644
--- a/imap/ChangeLog
+++ b/imap/ChangeLog
@@ -1,3 +1,7 @@
+2020-11-30  Sam Varshavchik  <mrsam@courier-mta.com>
+
+	* imap: send corrupted Unicode alerts only for new messages.
+
 2020-11-04  Sam Varshavchik  <mrsam@courier-mta.com>
 
 	* spec file: add BuildRequires: %{__make} (will be required in F34).
diff --git a/imap/configure.ac b/imap/configure.ac
index e538983..a85bb04 100644
--- a/imap/configure.ac
+++ b/imap/configure.ac
@@ -4,7 +4,7 @@ dnl
 dnl Copyright 1998 - 2019 Double Precision, Inc.  See COPYING for
 dnl distribution information.
 
-AC_INIT(courier-imap, 5.0.11, [courier-users@lists.sourceforge.net])
+AC_INIT(courier-imap, 5.0.11.20201130, [courier-users@lists.sourceforge.net])
 
 >confdefs.h  # Kill PACKAGE_ macros
 
diff --git a/imap/fetch.c b/imap/fetch.c
index 5daf150..257b295 100644
--- a/imap/fetch.c
+++ b/imap/fetch.c
@@ -61,8 +61,8 @@ extern void get_message_flags(struct imapscanmessageinfo *,
 extern void append_flags(char *, struct imapflags *);
 
 static int fetchitem(FILE **, int *, struct fetchinfo *,
-	struct imapscaninfo *,  unsigned long,
-	struct rfc2045 **);
+		     struct imapscaninfo *,  unsigned long,
+		     struct rfc2045 **, int *);
 
 static void bodystructure(FILE *, struct fetchinfo *,
 	struct imapscaninfo *,  unsigned long,
@@ -250,6 +250,7 @@ int do_fetch(unsigned long n, int byuid, void *p)
 	int	seen;
 	int	open_err;
 	int	unicode_err=0;
+	int	report_unicode_err=0;
 
 	fp=NULL;
 	open_err=0;
@@ -278,15 +279,11 @@ int do_fetch(unsigned long n, int byuid, void *p)
 	while (fi)
 	{
 		int rc=fetchitem(&fp, &open_err, fi, &current_maildir_info, n-1,
-				 &rfc2045p);
+				 &rfc2045p, &unicode_err);
 
 		if (rc > 0)
 			seen=1;
-		if (rc < 0)
-		{
-			rc=0;
-			unicode_err=1;
-		}
+
 		if ((fi=fi->next) != 0)	writes(" ");
 	}
 	writes(")\r\n");
@@ -299,22 +296,6 @@ int do_fetch(unsigned long n, int byuid, void *p)
 		return (0);
 	}
 
-	if (current_maildir_info.msgs[n-1].err8bitflag)
-		unicode_err=0;
-
-	if (unicode_err)
-	{
-		current_maildir_info.msgs[n-1].err8bitflag=1;
-
-		writes("* OK [ALERT] Message ");
-		writen(n);
-		writes(" appears to be a Unicode message and your"
-		       " E-mail reader did not enable Unicode support."
-		       " Please use an E-mail reader that supports"
-		       " IMAP with UTF-8 (see"
-		       " https://tools.ietf.org/html/rfc6855.html)\r\n");
-	}
-
 #if SMAP
 	if (!smapflag)
 #endif
@@ -334,17 +315,31 @@ int do_fetch(unsigned long n, int byuid, void *p)
 			reflag_filename(&current_maildir_info.msgs[n-1],&flags,
 				fileno(fp));
 			current_maildir_info.msgs[n-1].changedflags=1;
+
+			report_unicode_err=unicode_err;
 		}
 	}
 
+	if (report_unicode_err)
+	{
+		writes("* OK [ALERT] Message ");
+		writen(n);
+		writes(" appears to be a Unicode message and your"
+		       " E-mail reader did not enable Unicode support."
+		       " Please use an E-mail reader that supports"
+		       " IMAP with UTF-8 (see"
+		       " https://tools.ietf.org/html/rfc6855.html)\r\n");
+	}
+
 	if (current_maildir_info.msgs[n-1].changedflags)
 		fetchflags(n-1);
 	return (0);
 }
 
 static int fetchitem(FILE **fp, int *open_err, struct fetchinfo *fi,
-	struct imapscaninfo *i, unsigned long msgnum,
-	struct rfc2045 **mimep)
+		     struct imapscaninfo *i, unsigned long msgnum,
+		     struct rfc2045 **mimep,
+		     int *unicode_err)
 {
 	void (*fetchfunc)(FILE *, struct fetchinfo *,
 			  struct imapscaninfo *, unsigned long,
@@ -460,8 +455,7 @@ static int fetchitem(FILE **fp, int *open_err, struct fetchinfo *fi,
 	if (mimecorrectness && !enabled_utf8 &&
 	    ((*mimep)->rfcviolation & RFC2045_ERR8BITHEADER))
 	{
-		/* Still return -1, in order to [ALERT] the client */
-		rc= -1;
+		*unicode_err=1;
 	}
 
 	(*fetchfunc)(*fp, fi, i, msgnum, *mimep);
diff --git a/imap/imapd.c b/imap/imapd.c
index 095defb..c0b5908 100644
--- a/imap/imapd.c
+++ b/imap/imapd.c
@@ -1334,8 +1334,6 @@ void doNoop(int real_noop)
 #endif
 		new_maildir_info.msgs[j].copiedflag=
 			current_maildir_info.msgs[i].copiedflag;
-		new_maildir_info.msgs[j].err8bitflag=
-			current_maildir_info.msgs[i].err8bitflag;
 		++j;
 	}
 
diff --git a/imap/imapscanclient.c b/imap/imapscanclient.c
index 515abc2..5ab7148 100644
--- a/imap/imapscanclient.c
+++ b/imap/imapscanclient.c
@@ -868,7 +868,6 @@ int	dowritecache=0;
 		scaninfo->msgs[i].filename=tempinfo_array[i]->filename;
 		scaninfo->msgs[i].keywordMsg=NULL;
 		scaninfo->msgs[i].copiedflag=0;
-		scaninfo->msgs[i].err8bitflag=0;
 #if SMAP
 		if (smapflag)
 			scaninfo->msgs[i].recentflag=0;
diff --git a/imap/imapscanclient.h b/imap/imapscanclient.h
index 373bc83..2e516cb 100644
--- a/imap/imapscanclient.h
+++ b/imap/imapscanclient.h
@@ -23,7 +23,6 @@ struct imapscanmessageinfo {
 
 	char storeflag;  /* Used by imap_addRemoveKeywords() */
 
-	char err8bitflag;       /* Invalid 8 bit header error was reported */
 	/* When reading keywords, hash messages by filename */
 
 	struct imapscanmessageinfo *firstBucket, *nextBucket;
diff --git a/rfc2045/testrfc3676parsersuite.txt b/rfc2045/testrfc3676parsersuite.txt
index f523981..93c87d5 100644
--- a/rfc2045/testrfc3676parsersuite.txt
+++ b/rfc2045/testrfc3676parsersuite.txt
@@ -109,5 +109,5 @@
 
 [0: 1234567890 1234567890 1234567890 1234567890 1234567890 1234567890 ...1234567890 1234567890 1234567890 1234567890 1234567890 1234567890]
 
-[0: 12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234...1678901234567890123456789012345678901234567890123456789012345678901234567...190123456789012345678901234567890123456789012345678901234567890]
+[0: 12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234...5678901234567890123456789012345678901234567890123456789012345678901234567...890123456789012345678901234567890123456789012345678901234567890]
 
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index f864e2d..dbc71aa 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -89,11 +89,11 @@ include_HEADERS=courier-unicode.h \
 
 man_MANS= \
         $(srcdir)/man/courier-unicode.7 \
+        $(srcdir)/man/unicode\:\:bidi.3 \
         $(srcdir)/man/unicode\:\:bidi_calc.3 \
         $(srcdir)/man/unicode\:\:bidi_cleanup.3 \
         $(srcdir)/man/unicode\:\:bidi_embed.3 \
         $(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \
-        $(srcdir)/man/unicode\:\:bidi_extra_cleanup.3 \
         $(srcdir)/man/unicode\:\:bidi_logical_order.3 \
         $(srcdir)/man/unicode\:\:bidi_reorder.3 \
         $(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \
@@ -118,7 +118,6 @@ man_MANS= \
         $(srcdir)/man/unicode_bidi_cleanup.3 \
         $(srcdir)/man/unicode_bidi_embed.3 \
         $(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \
-        $(srcdir)/man/unicode_bidi_extra_cleanup.3 \
         $(srcdir)/man/unicode_bidi_logical_order.3 \
         $(srcdir)/man/unicode_bidi_mirror.3 \
         $(srcdir)/man/unicode_bidi_reorder.3 \
@@ -515,4 +514,4 @@ distrelease:
 	$(MAKE) dist
 
 www:
-	rsync -a html/. $$HOME/www/hostrocket/courier-mta.org/unicode
+	rsync -a --delete-after html/. $$HOME/www/hostrocket/courier-mta.org/unicode
diff --git a/unicode/biditest2.C b/unicode/biditest2.C
index a9ab87d..ded76be 100644
--- a/unicode/biditest2.C
+++ b/unicode/biditest2.C
@@ -307,7 +307,9 @@ void character_test()
 			exit(1);
 		}
 
-		unicode::bidi_extra_cleanup(s, levels);
+		unicode::bidi_cleanup(s, levels,
+				      [](size_t) {},
+				      UNICODE_BIDI_CLEANUP_CANONICAL);
 
 		auto dump_ls=
 			[&]
@@ -371,8 +373,13 @@ void character_test()
 			}
 
 			unicode::bidi_reorder(new_string, std::get<0>(ret));
-			unicode::bidi_extra_cleanup(new_string,
-						    std::get<0>(ret));
+			unicode::bidi_cleanup(new_string,
+					      std::get<0>(ret),
+					      []
+					      (size_t)
+					      {
+					      },
+					      UNICODE_BIDI_CLEANUP_CANONICAL);
 
 			/* New string is now back in logical order */
 
diff --git a/unicode/book.xml b/unicode/book.xml
index c8948ba..b0342ea 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -304,7 +304,6 @@ See COPYING for distribution information.
 	  <refname>unicode_bidi_calc</refname>
 	  <refname>unicode_bidi_reorder</refname>
 	  <refname>unicode_bidi_cleanup</refname>
-	  <refname>unicode_bidi_extra_cleanup</refname>
 	  <refname>unicode_bidi_logical_order</refname>
 	  <refname>unicode_bidi_embed</refname>
 	  <refname>unicode_bidi_embed_paragraph_level</refname>
@@ -341,15 +340,7 @@ See COPYING for distribution information.
               <paramdef>char32_t *<parameter>string</parameter></paramdef>
               <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
               <paramdef>size_t <parameter>n</parameter></paramdef>
-              <paramdef>void (*<parameter>removed_callback</parameter>)(size_t, size_t, void *)</paramdef>
-	      <paramdef>void *<parameter>arg</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-	      <funcdef>size_t <function>unicode_bidi_extra_cleanup</function></funcdef>
-              <paramdef>char32_t *<parameter>string</parameter></paramdef>
-              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
-              <paramdef>size_t <parameter>n</parameter></paramdef>
+	      <paramdef>int <parameter>options</parameter></paramdef>
               <paramdef>void (*<parameter>removed_callback</parameter>)(size_t, size_t, void *)</paramdef>
 	      <paramdef>void *<parameter>arg</parameter></paramdef>
 	    </funcprototype>
@@ -450,8 +441,7 @@ See COPYING for distribution information.
 	      </listitem>
 	      <listitem>
 		<para>
-		  Use <function>unicode_bidi_cleanup</function>() or
-		  <function>unicode_bidi_extra_cleanup</function>(),
+		  Use <function>unicode_bidi_cleanup</function>()
 		  to remove the characters from the string which are used
 		  by the bi-directional algorithm, and are not needed for
 		  rendering the text.
@@ -585,28 +575,12 @@ See COPYING for distribution information.
 	      <quote>rendering order</quote>, but still contain bi-directional
 	      embedding, override, boundary-neutral, isolate, and marker
 	      characters.
-	      <function>unicode_bidi_cleanup</function>() and
-	      <function>unicode_bidi_extra_cleanup</function>() remove these
-	      characters and directional markers from the unicode string.
-	      <function>unicode_bidi_cleanup</function> removes only the
-	      embedding, override, and  boundry-neutral characters (as
-	      specified by step X9 of the bi-directional algorithm).
-	      <function>unicode_bidi_extra_cleanup</function>()
-	      additionally removes the isolation markers, implicit markers;
-	      and all characters
-	      classified as paragraph separators get replaced by a newline.
-            </para>
-	    <para>
-	      A non-null pointer to the directional embedding level buffer,
-	      of the same size as the string, also removes the corresponding
-	      values from the buffer, and the remaining values in the
-	      embedding level buffer get reset to
-	      levels <literal>UNICODE_BIDI_LR</literal> and
-	      <literal> UNICODE_BIDI_RL</literal>, only.
-            </para>
+	      <function>unicode_bidi_cleanup</function>
+	      removes these characters and directional markers.
+	    </para>
 	    <para>
-	      The parameters to <function>unicode_bidi_cleanup</function>() and
-	      <function>unicode_bidi_extra_cleanup</function>() are:
+	      The parameters to <function>unicode_bidi_cleanup</function>()
+	      are:
             </para>
 
 	    <itemizedlist>
@@ -617,15 +591,66 @@ See COPYING for distribution information.
               </listitem>
 	      <listitem>
 		<para>
-		  The pointer to the directional embedding buffer.
-                </para>
+		  A non-null pointer to the directional embedding level buffer,
+		  of the same size as the string, also removes the corresponding
+		  values from the buffer, and the remaining values in the
+		  embedding level buffer get reset to
+		  levels <literal>UNICODE_BIDI_LR</literal> and
+		  <literal> UNICODE_BIDI_RL</literal>, only.
+		</para>
               </listitem>
+
 	      <listitem>
 		<para>
 		  The size of the unicode string and the directional embedding
-		  buffer.
+		  buffer (if not NULL).
                 </para>
               </listitem>
+
+	      <listitem>
+		<para>
+		  A a bitmask that selects the following options
+		  (or 0 if no options):
+		</para>
+
+		<variablelist>
+		  <varlistentry>
+		    <term><literal>UNICODE_BIDI_CLEANUP_EXTRA</literal></term>
+		    <listitem>
+		      <para>
+			In addition to removing all embedding, override, and
+			boundry-neutral characters as
+			specified by step X9 of the bi-directional algorithm
+			(the default behavior without this flag), also
+			remove all isolation markers and implicit markers.
+		      </para>
+		    </listitem>
+		  </varlistentry>
+
+		  <varlistentry>
+		    <term><literal>UNICODE_BIDI_CLEANUP_BNL</literal></term>
+		    <listitem>
+		      <para>
+			Replace all characters classified as paragraph
+			separators with a newline character.
+		      </para>
+		    </listitem>
+		  </varlistentry>
+
+		  <varlistentry>
+		    <term><literal>UNICODE_BIDI_CLEANUP_CANONICAL</literal></term>
+		    <listitem>
+		      <para>
+			A combined set of
+			<literal>UNICODE_BIDI_CLEANUP_EXTRA</literal>
+			and
+			<literal>UNICODE_BIDI_CLEANUP_BNL</literal>,
+		      </para>
+		    </listitem>
+		  </varlistentry>
+		</variablelist>
+	      </listitem>
+
 	      <listitem>
 		<para>
 		  A pointer to a function that gets repeatedly invoked with the
@@ -647,17 +672,17 @@ See COPYING for distribution information.
 	      from the first to
 	      the last removed character (if any).
             </para>
-	    <para>
-	      Multiple calls to <function>unicode_bidi_cleanup</function>() or
-	      <function>unicode_bidi_extra_cleanup</function>() do no harm;
-	      except that <function>unicode_bidi_extra_cleanup</function>()
-	      always removes all the additional characters that
-	      <function>unicode_bidi_cleanup</function>() does not remove.
-            </para>
+
 	    <para>
 	      The character string and the embedding level values resulting
-	      from <function>unicode_bidi_extra_cleanup</function>() are in
+	      from <function>unicode_bidi_cleanup</function>()
+	      with the <literal>UNICODE_BIDI_CLEANUP_CANONICAL</literal>
+	      are in
 	      <quote>canonical rendering order</quote>.
+	      <function>unicode_bidi_logical_order</function>() and
+	      <function>unicode_bidi_embed</function>() require the
+	      canonical rendering order for their string and embedding level
+	      values.
             </para>
 	  </refsect2>
 
@@ -675,7 +700,8 @@ See COPYING for distribution information.
 	      canonical rendering order after applying
 	      <function>unicode_bidi_calc()</function>,
 	      <function>unicode_reorder()</function> and
-	      <function>unicode_bidi_extra_cleanup()</function>,
+	      <function>unicode_bidi_cleanup()</function>
+	      (with the canonical option),
 	      with the same paragraph_embedding level.
             </para>
 
@@ -2628,15 +2654,15 @@ See COPYING for distribution information.
 	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
-	  <refentrytitle>unicode::bidi::calc</refentrytitle>
+	  <refentrytitle>unicode::bidi</refentrytitle>
 	  <manvolnum>3</manvolnum>
 	</refmeta>
 
 	<refnamediv>
+	  <refname>unicode::bidi</refname>
 	  <refname>unicode::bidi_calc</refname>
 	  <refname>unicode::bidi_reorder</refname>
 	  <refname>unicode::bidi_cleanup</refname>
-	  <refname>unicode::bidi_extra_cleanup</refname>
 	  <refname>unicode::bidi_logical_order</refname>
 	  <refname>unicode::bidi_embed</refname>
 	  <refname>unicode::bidi_embed_paragraph_level</refname>
@@ -2674,6 +2700,7 @@ See COPYING for distribution information.
               <funcdef>void <function>unicode::bidi_cleanup</function></funcdef>
 	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
 	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+	      <paramdef>int <parameter>cleanup_options</parameter></paramdef>
             </funcprototype>
 
 	    <funcprototype>
@@ -2681,19 +2708,7 @@ See COPYING for distribution information.
 	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
 	      <paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
 	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
-            </funcprototype>
-
-	    <funcprototype>
-              <funcdef>void <function>unicode::bidi_extra_cleanup</function></funcdef>
-	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
-	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
-            </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode::bidi_extra_cleanup</function></funcdef>
-	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
-	      <paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
-	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+	      <paramdef>int <parameter>cleanup_options</parameter></paramdef>
             </funcprototype>
 
 	    <funcprototype>
@@ -2789,7 +2804,51 @@ See COPYING for distribution information.
               </para>
             </listitem>
           </itemizedlist>
+
+	  <refsect2 id="unicode_cpp_bidi_literals">
+	    <title><literal>unicode::literals</literal> namespace</title>
+
+	    <blockquote>
+	      <informalexample>
+		<programlisting><![CDATA[
+using namespace unicode::literals;
+
+std::u32string foo(std::u32string bar)
+{
+	return bar + LRO;
+}
+]]></programlisting>
+	      </informalexample>
+	    </blockquote>
+
+	    <para>
+	      This namespace contains the following <literal>constexpr</literal>
+	      definitions:
+	    </para>
+
+	    <itemizedlist>
+	      <listitem>
+		<para>
+		  <classname>char32_t</classname> arrays with literal
+		  Unicode character strings containing Unicode directional,
+		  isolate, and override markers, like
+		  <literal>LRO</literal>,
+		  <literal>RLO</literal> and others.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  <literal>CLEANUP_EXTRA</literal>,
+		  <literal>CLEANUP_BNL</literal>, and
+		  <literal>CLEANUP_CANONICAL</literal> options for
+		  <function>unicode::bidi_cleanup</function>().
+		</para>
+	      </listitem>
+	    </itemizedlist>
+
+	  </refsect2>
 	</refsect1>
+
 	<refsect1 id="unicode_cpp_bidi_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index cc9dbbb..3de76d3 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -548,6 +548,24 @@ size_t unicode_wbscan_end(unicode_wbscan_info_t i);
 #define UNICODE_LRO	0x202d /* Left-to-right override */
 #define UNICODE_PDF	0x202c /* Pop directional override */
 
+#ifdef __cplusplus
+#if __cplusplus >= 201103L
+namespace unicode {
+	namespace literals {
+
+		constexpr char32_t LRM[]={UNICODE_LRM, 0};
+		constexpr char32_t RLM[]={UNICODE_RLM, 0};
+		constexpr char32_t ALM[]={UNICODE_ALM, 0};
+		constexpr char32_t LRI[]={UNICODE_LRI, 0};
+		constexpr char32_t RLI[]={UNICODE_RLI, 0};
+		constexpr char32_t PDI[]={UNICODE_PDI, 0};
+		constexpr char32_t RLO[]={UNICODE_RLO, 0};
+		constexpr char32_t LRO[]={UNICODE_LRO, 0};
+		constexpr char32_t PDF[]={UNICODE_PDF, 0};
+	}
+}
+#endif
+#endif
 
 typedef char unicode_bidi_bracket_type_t;
 
@@ -608,19 +626,50 @@ typedef enum {
 
 extern enum_bidi_type_t unicode_bidi_type(char32_t c);
 
+/* Bitmask options to unicode_bidi_cleanup */
+
+/*
+ In addition to removing embedding, override, and boundary-neutral
+ characters also remove isolation markers and implicit markers.
+*/
+
+#define UNICODE_BIDI_CLEANUP_EXTRA	1
+
+/*
+  Replace all characters classified as paragraph separators by a newline
+  character.
+*/
+
+#define UNICODE_BIDI_CLEANUP_BNL	2
+
+/*
+  Options for canonical rendering order.
+*/
+
+#define UNICODE_BIDI_CLEANUP_CANONICAL				\
+	(UNICODE_BIDI_CLEANUP_EXTRA | UNICODE_BIDI_CLEANUP_BNL)
+
+#ifdef __cplusplus
+#if __cplusplus >= 201103L
+namespace unicode {
+	namespace literals {
+		constexpr int CLEANUP_EXTRA=UNICODE_BIDI_CLEANUP_EXTRA;
+
+		constexpr int CLEANUP_BNL=UNICODE_BIDI_CLEANUP_BNL;
+
+		constexpr int CLEANUP_CANONICAL=UNICODE_BIDI_CLEANUP_CANONICAL;
+	}
+}
+#endif
+#endif
+
 extern size_t unicode_bidi_cleanup(char32_t *string,
 				   unicode_bidi_level_t *levels,
 				   size_t n,
+				   int options,
 				   void (*removed_callback)(size_t, void *),
 				   void *);
 
-extern size_t unicode_bidi_extra_cleanup(char32_t *string,
-					 unicode_bidi_level_t *levels,
-					 size_t n,
-					 void (*removed_callback)(size_t,
-								  void *),
-					 void *);
-
 extern void unicode_bidi_logical_order(char32_t *string,
 				       unicode_bidi_level_t *levels,
 				       size_t n,
@@ -2147,7 +2196,8 @@ void bidi_reorder(std::vector<unicode_bidi_level_t> &levels,
 
 void bidi_cleanup(std::u32string &string,
 		  const std::function<void (size_t)> &removed_callback=
-		  [](size_t) {});
+		  [](size_t) {},
+		  int cleanup_options=0);
 
 //! Also remove them from the embedding direction level buffer.
 
@@ -2156,28 +2206,8 @@ void bidi_cleanup(std::u32string &string,
 int bidi_cleanup(std::u32string &string,
 		 std::vector<unicode_bidi_level_t> &levels,
 		 const std::function<void (size_t)> &removed_callback=
-		  [](size_t) {});
-
-
-//! Remove directional markers and isolation markers.
-
-//! Removes them from the string, in place. Optional lambda gets notified
-//! of the index (in the original string, of each removed marker.
-
-void bidi_extra_cleanup(std::u32string &string,
-			const std::function<void (size_t)>
-			&removed_callback=
-			[](size_t) {});
-
-//! Also remove them from the embedding direction level buffer.
-
-//! Returns non-0 in case of non-matching level buffer size.
-
-int bidi_extra_cleanup(std::u32string &string,
-		       std::vector<unicode_bidi_level_t> &levels,
-		       const std::function<void (size_t)>
-		       &removed_callback=
-		       [](size_t) {});
+		 [](size_t) {},
+		 int cleanup_options=0);
 
 //! Convert Unicode string from canonical rendering order to logical order.
 int bidi_logical_order(std::u32string &string,
@@ -2189,8 +2219,7 @@ int bidi_logical_order(std::u32string &string,
 //! Convert Unicode string from canonical rendering order to logical order.
 void bidi_logical_order(std::vector<unicode_bidi_level_t> &levels,
 			unicode_bidi_level_t paragraph_embedding,
-			const std::function<void (size_t, size_t)>
-			&lambda);
+			const std::function<void (size_t, size_t)> &lambda);
 
 //! Embed directional and isolation markers
 
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index 79c4db5..cfae12f 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -2032,6 +2032,7 @@ void unicode_bidi_reorder(char32_t *p,
 size_t unicode_bidi_cleanup(char32_t *string,
 			    unicode_bidi_level_t *levels,
 			    size_t n,
+			    int cleanup_options,
 			    void (*removed_callback)(size_t, void *),
 			    void *arg)
 {
@@ -2040,7 +2041,13 @@ size_t unicode_bidi_cleanup(char32_t *string,
 	{
 		enum_bidi_type_t cl=unicode_bidi_type(string[j]);
 
-		if (IS_X9(cl))
+		if (cleanup_options & UNICODE_BIDI_CLEANUP_EXTRA
+		    ? (
+		       is_explicit_indicator_except_b(cl) ||
+		       (string[j] == UNICODE_LRM ||
+			string[j] == UNICODE_RLM ||
+			string[j] == UNICODE_ALM))
+		    : IS_X9(cl))
 		{
 			if (removed_callback)
 				(*removed_callback)(j, arg);
@@ -2048,34 +2055,9 @@ size_t unicode_bidi_cleanup(char32_t *string,
 		}
 		if (levels)
 			levels[i]=levels[j] & 1;
-		++i;
-	}
-	return i;
-}
-
-size_t unicode_bidi_extra_cleanup(char32_t *string,
-				  unicode_bidi_level_t *levels,
-				  size_t n,
-				  void (*removed_callback)(size_t, void *),
-				  void *arg)
-{
-	size_t i=0;
-	for (size_t j=0; j<n; ++j)
-	{
-		enum_bidi_type_t cl=unicode_bidi_type(string[j]);
 
-		if (is_explicit_indicator_except_b(cl) ||
-		    (string[j] == UNICODE_LRM ||
-		     string[j] == UNICODE_RLM ||
-		     string[j] == UNICODE_ALM))
-		{
-			if (removed_callback)
-				(*removed_callback)(j, arg);
-			continue;
-		}
-		string[i]=cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j];
-		if (levels)
-			levels[i]=levels[j] & 1;
+		string[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL)
+			&& cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j];
 		++i;
 	}
 	return i;
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index 4217630..a0d5ac4 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -691,7 +691,8 @@ extern "C" {
 }
 
 void unicode::bidi_cleanup(std::u32string &string,
-			   const std::function<void (size_t)> &lambda)
+			   const std::function<void (size_t)> &lambda,
+			   int cleanup_options)
 {
 	if (string.empty())
 		return;
@@ -701,6 +702,7 @@ void unicode::bidi_cleanup(std::u32string &string,
 	size_t n=unicode_bidi_cleanup(&string[0],
 				      0,
 				      string.size(),
+				      cleanup_options,
 				      removed_callback,
 				      reinterpret_cast<void *>(&cb));
 	cb.rethrow();
@@ -709,15 +711,20 @@ void unicode::bidi_cleanup(std::u32string &string,
 
 int unicode::bidi_cleanup(std::u32string &string,
 			  std::vector<unicode_bidi_level_t> &levels,
-			  const std::function<void (size_t)> &lambda)
+			  const std::function<void (size_t)> &lambda,
+			  int cleanup_options)
 {
 	if (levels.size() != string.size())
 		return -1;
 
+	if (levels.size() == 0)
+		return 0;
+
 	cb_wrapper<void (size_t)> cb{lambda};
 	size_t n=unicode_bidi_cleanup(&string[0],
 				      &levels[0],
 				      string.size(),
+				      cleanup_options,
 				      removed_callback,
 				      reinterpret_cast<void *>(&cb));
 	cb.rethrow();
@@ -727,42 +734,6 @@ int unicode::bidi_cleanup(std::u32string &string,
 	return 0;
 }
 
-
-void unicode::bidi_extra_cleanup(std::u32string &string,
-				 const std::function<void (size_t)> &lambda)
-{
-	if (string.empty())
-		return;
-
-	cb_wrapper<void (size_t)> cb{lambda};
-	size_t n=unicode_bidi_extra_cleanup(&string[0],
-					    0,
-					    string.size(),
-					    removed_callback,
-					    reinterpret_cast<void *>(&cb));
-	cb.rethrow();
-	string.resize(n);
-}
-
-int unicode::bidi_extra_cleanup(std::u32string &string,
-				std::vector<unicode_bidi_level_t> &levels,
-				const std::function<void (size_t)> &lambda)
-{
-	if (levels.size() != string.size())
-		return -1;
-
-	cb_wrapper<void (size_t)> cb{lambda};
-	size_t n=unicode_bidi_extra_cleanup(&string[0],
-					    &levels[0],
-					    string.size(),
-					    removed_callback,
-					    reinterpret_cast<void *>(&cb));
-	cb.rethrow();
-	string.resize(n);
-	levels.resize(n);
-	return 0;
-}
-
 int unicode::bidi_logical_order(std::u32string &string,
 				std::vector<unicode_bidi_level_t> &levels,
 				unicode_bidi_level_t paragraph_embedding,