summaryrefslogtreecommitdiffstats
path: root/rfc2045/rfc3676parser.c
diff options
context:
space:
mode:
authorSam Varshavchik2013-08-19 16:39:41 -0400
committerSam Varshavchik2013-08-25 14:43:51 -0400
commit9c45d9ad13fdf439d44d7443ae75da15ea0223ed (patch)
tree7a81a04cb51efb078ee350859a64be2ebc6b8813 /rfc2045/rfc3676parser.c
parenta9520698b770168d1f33d6301463bb70a19655ec (diff)
downloadcourier-libs-9c45d9ad13fdf439d44d7443ae75da15ea0223ed.tar.bz2
Initial checkin
Imported from subversion report, converted to git. Updated all paths in scripts and makefiles, reflecting the new directory hierarchy.
Diffstat (limited to 'rfc2045/rfc3676parser.c')
-rw-r--r--rfc2045/rfc3676parser.c1005
1 files changed, 1005 insertions, 0 deletions
diff --git a/rfc2045/rfc3676parser.c b/rfc2045/rfc3676parser.c
new file mode 100644
index 0000000..332d22b
--- /dev/null
+++ b/rfc2045/rfc3676parser.c
@@ -0,0 +1,1005 @@
+/*
+** Copyright 2011 Double Precision, Inc. See COPYING for
+** distribution information.
+*/
+
+#include "rfc2045_config.h"
+#include "rfc3676parser.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define NONFLOWED_WRAP_REDUCE 74
+
+#define NONFLOWED_THRESHOLD_EXCEEDED 30
+
+
+static void emit_line_begin(rfc3676_parser_t handle);
+
+static void emit_line_contents(rfc3676_parser_t handle,
+ const unicode_char *uc,
+ size_t cnt);
+
+static void emit_line_flowed_wrap(rfc3676_parser_t handle);
+
+static void emit_line_end(rfc3676_parser_t handle);
+
+
+static void nonflowed_line_begin(rfc3676_parser_t handle);
+
+static void nonflowed_line_contents(rfc3676_parser_t handle,
+ const unicode_char *uc,
+ size_t cnt);
+
+static void nonflowed_line_end(rfc3676_parser_t handle);
+
+static int nonflowed_line_process(int linebreak_opportunity,
+ unicode_char ch, void *dummy);
+
+#define EMIT_LINE_BEGIN(h) do { \
+ (*(h)->line_begin_handler)(h); \
+ } while (0)
+
+#define EMIT_LINE_CONTENTS(h, uc, cnt) do { \
+ (*(h)->line_content_handler)((h),(uc),(cnt)); \
+ } while (0)
+
+#define EMIT_LINE_END(h) do { \
+ (*(h)->line_end_handler)(h); \
+ } while (0)
+
+struct rfc3676_parser_struct {
+
+ struct rfc3676_parser_info info;
+ libmail_u_convert_handle_t uhandle;
+
+ int errflag;
+
+ /* Receive raw text stream, converted to unicode */
+ size_t (*line_handler)(rfc3676_parser_t,
+ const unicode_char *ptr, size_t cnt);
+
+ /*
+ ** Receive mostly raw text stream: CRs that precede an LF
+ ** are removed from the stream received by content_handler.
+ */
+ size_t (*content_handler)(rfc3676_parser_t,
+ const unicode_char *ptr, size_t cnt);
+
+ size_t quote_level;
+ size_t sig_block_index;
+
+ /*
+ ** Flag: previous line ended in a flowed space, and the previous
+ ** line's quoting level was this.
+ */
+ int has_previous_quote_level;
+ size_t previous_quote_level;
+
+ /*
+ ** Flag: current line was flowed into from a previous line with the
+ ** same quoting level.
+ */
+ int was_previous_quote_level;
+
+ /* A line has begun */
+ void (*line_begin_handler)(rfc3676_parser_t handle);
+
+ /* Content of this line */
+ void (*line_content_handler)(rfc3676_parser_t handle,
+ const unicode_char *uc,
+ size_t cnt);
+
+ /* End of this line */
+ void (*line_end_handler)(rfc3676_parser_t handle);
+
+
+ /*
+ ** When non-flowed text is getting rewrapped, we utilize the services
+ ** of the unicode_lbc_info API.
+ */
+
+ unicode_lbc_info_t lb;
+
+ struct unicode_buf nonflowed_line;
+ /* Collect unflowed line until it reaches the given size */
+
+ struct unicode_buf nonflowed_next_word;
+ /* Collects unicode stream until a linebreaking opportunity */
+
+ size_t nonflowed_line_target_width;
+ /* Targeted width of nonflowed lines */
+
+ size_t nonflowed_line_width; /* Width of nonflowed_line */
+
+ size_t nonflowed_next_word_width; /* Width of nonflowed_next_word */
+
+ /* Current handle of non-flowd content. */
+ void (*nonflowed_line_process)(struct rfc3676_parser_struct *handle,
+ int linebreak_opportunity,
+ unicode_char ch,
+ size_t ch_width);
+
+ void (*nonflowed_line_end)(struct rfc3676_parser_struct *handle);
+};
+
+static int parse_unicode(const char *, size_t, void *);
+
+static size_t scan_crlf(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t scan_crlf_seen_cr(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t start_of_line(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t count_quote_level(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t counted_quote_level(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t check_signature_block(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t start_content_line(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t scan_content_line(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t seen_sig_block(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t seen_notsig_block(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+static size_t seen_content_sp(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt);
+
+
+/*
+** The top layer initializes the conversion to unicode.
+*/
+
+rfc3676_parser_t rfc3676parser_init(const struct rfc3676_parser_info *info)
+{
+ rfc3676_parser_t handle=
+ (rfc3676_parser_t)calloc(1,
+ sizeof(struct rfc3676_parser_struct));
+
+ if (!handle)
+ return NULL;
+
+ handle->info=*info;
+ if ((handle->uhandle=libmail_u_convert_init(info->charset,
+ libmail_u_ucs4_native,
+ parse_unicode,
+ handle)) == NULL)
+ {
+ free(handle);
+ return NULL;
+ }
+
+ if (!handle->info.isflowed)
+ handle->info.isdelsp=0; /* Sanity check */
+
+ handle->line_handler=scan_crlf;
+ handle->content_handler=start_of_line;
+ handle->has_previous_quote_level=0;
+ handle->previous_quote_level=0;
+
+ handle->line_begin_handler=emit_line_begin;
+ handle->line_content_handler=emit_line_contents;
+ handle->line_end_handler=emit_line_end;
+
+ unicode_buf_init(&handle->nonflowed_line, (size_t)-1);
+ unicode_buf_init(&handle->nonflowed_next_word, (size_t)-1);
+
+ if (!handle->info.isflowed)
+ {
+ handle->line_begin_handler=nonflowed_line_begin;
+ handle->line_content_handler=nonflowed_line_contents;
+ handle->line_end_handler=nonflowed_line_end;
+ }
+ return handle;
+}
+
+int rfc3676parser(rfc3676_parser_t handle,
+ const char *txt,
+ size_t txt_cnt)
+{
+ if (handle->errflag)
+ return handle->errflag; /* Error occured previously */
+
+ /* Convert to unicode and invoke parse_unicode() */
+
+ return libmail_u_convert(handle->uhandle, txt, txt_cnt);
+}
+
+/*
+** Convert char stream from iconv into unicode_chars, then pass them to the
+** current handler, until all converted unicode_chars are consumed.
+*/
+
+static int parse_unicode(const char *ucs4, size_t nbytes, void *arg)
+{
+ rfc3676_parser_t handle=(rfc3676_parser_t)arg;
+ unicode_char ucs4buf[128];
+ const unicode_char *p;
+
+ /* Keep going until there's an error, or everything is consumed. */
+
+ while (handle->errflag == 0 && nbytes)
+ {
+ /* Do it in pieces, using the temporary unicode_char buffer */
+
+ size_t cnt=nbytes;
+
+ if (cnt > sizeof(ucs4buf))
+ cnt=sizeof(ucs4buf);
+
+ memcpy(ucs4buf, ucs4, cnt);
+
+ ucs4 += cnt;
+ nbytes -= cnt;
+
+ cnt /= sizeof(unicode_char);
+ p=ucs4buf;
+
+ /* Keep feeding it to the current handler */
+
+ while (handle->errflag == 0 && cnt)
+ {
+ size_t n=(*handle->line_handler)(handle, p, cnt);
+
+ if (handle->errflag == 0)
+ {
+ cnt -= n;
+ p += n;
+ }
+ }
+ }
+
+ return handle->errflag;
+}
+
+int rfc3676parser_deinit(rfc3676_parser_t handle, int *errptr)
+{
+ /* Finish unicode conversion */
+
+ int rc=libmail_u_convert_deinit(handle->uhandle, errptr);
+
+ if (rc == 0)
+ rc=handle->errflag;
+
+ if (rc == 0)
+ {
+ (*handle->line_handler)(handle, NULL, 0);
+ rc=handle->errflag;
+ }
+
+ if (handle->lb)
+ {
+ int rc2=unicode_lbc_end(handle->lb);
+
+ if (rc2 && rc == 0)
+ rc=rc2;
+ }
+
+ unicode_buf_deinit(&handle->nonflowed_line);
+ unicode_buf_deinit(&handle->nonflowed_next_word);
+
+ free(handle);
+ return rc;
+}
+
+/*
+** Look for a CR that might precede an LF.
+*/
+
+static size_t scan_crlf(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ size_t i;
+
+ if (ptr == NULL)
+ {
+ if (handle->errflag == 0)
+ (*handle->content_handler)(handle, NULL, 0);
+ return 0;
+ }
+
+ for (i=0; ptr && i<cnt; ++i)
+ {
+ if (ptr[i] == '\r')
+ break;
+ }
+
+ if (i)
+ {
+ size_t consumed=0;
+
+ while (i && handle->errflag == 0)
+ {
+ size_t n=(*handle->content_handler)(handle, ptr, i);
+
+ ptr += n;
+ consumed += n;
+ i -= n;
+ }
+ return consumed;
+ }
+
+ /* Consume the first character, the CR */
+
+ handle->line_handler=scan_crlf_seen_cr;
+ return 1;
+}
+
+/*
+** Check the first character after a CR.
+*/
+
+static size_t scan_crlf_seen_cr(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ unicode_char cr='\r';
+
+ handle->line_handler=scan_crlf;
+
+ if (ptr == NULL || *ptr != '\n')
+ {
+ /*
+ ** CR was not followed by a NL.
+ ** Restore it in the char stream.
+ */
+
+ while (handle->errflag == 0)
+ if ((*handle->content_handler)(handle, &cr, 1))
+ break;
+ }
+
+ return scan_crlf(handle, ptr, cnt);
+}
+
+/*
+** From this point on, CRLF are collapsed into NLs, so don't need to worry
+** about them.
+*/
+
+
+/*
+** Check for an EOF indication at the start of the line.
+*/
+
+static size_t start_of_line(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ if (ptr == NULL)
+ {
+ if (handle->has_previous_quote_level)
+ EMIT_LINE_END(handle); /* Last line was flowed */
+
+ return cnt; /* EOF */
+ }
+
+ /* Begin counting the quote level */
+
+ handle->content_handler=count_quote_level;
+ handle->quote_level=0;
+ return count_quote_level(handle, ptr, cnt);
+}
+
+/*
+** Count leading > in flowed content.
+*/
+
+static size_t count_quote_level(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ size_t i;
+
+ if (ptr == NULL) /* EOF, pretend that the quote level was counted */
+ return (handle->content_handler=counted_quote_level)
+ (handle, ptr, cnt);
+
+ for (i=0; i<cnt; ++i)
+ {
+ if (ptr[i] != '>' || !handle->info.isflowed)
+ {
+ handle->content_handler=counted_quote_level;
+
+ if (i == 0)
+ return counted_quote_level(handle, ptr, cnt);
+ break;
+ }
+ ++handle->quote_level;
+ }
+
+ return i;
+}
+
+/*
+** This line's quote level has now been counted.
+*/
+
+static size_t counted_quote_level(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ handle->was_previous_quote_level=0;
+
+ /*
+ ** If the previous line was flowed and this line has the same
+ ** quote level, make the flow official.
+ */
+
+ if (handle->has_previous_quote_level &&
+ handle->quote_level == handle->previous_quote_level)
+ {
+ /* Remember that this line was flowed into */
+ handle->was_previous_quote_level=1;
+ }
+ else
+ {
+ /*
+ ** If the previous line was flowed, but this line carries
+ ** a different quote level, force-terminate the previous
+ ** line, before beginning this line.
+ */
+ if (handle->has_previous_quote_level)
+ EMIT_LINE_END(handle);
+
+ EMIT_LINE_BEGIN(handle);
+ }
+
+ handle->has_previous_quote_level=0;
+ /* Assume this line won't be flowed, until shown otherwise */
+
+
+ if (!handle->info.isflowed)
+ {
+ /*
+ ** No space-stuffing, or sig block checking, if this is not
+ ** flowed content.
+ */
+ handle->content_handler=scan_content_line;
+ return scan_content_line(handle, ptr, cnt);
+ }
+
+
+ handle->content_handler=start_content_line;
+
+ if (ptr != NULL && *ptr == ' ')
+ return 1; /* Remove stuffed space */
+
+ return start_content_line(handle, ptr, cnt);
+}
+
+/*
+** Minor deviation from RFC3676, but this fixes a lot of broken text.
+**
+** If the previous line was flowed, but this is an empty line (optionally
+** space-stuffed), unflow the last line (make it fixed), and this becomes
+** a fixed line too. Example:
+**
+** this is the last end of a paragraph[SPACE]
+** [SPACE]
+** This is the first line of the next paragraph.
+**
+** Strict RFC3676 rules will parse this as a flowed line, then a fixed line,
+** resulting in no paragraph breaks.
+*/
+
+static size_t start_content_line(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ /*
+ ** We'll start scanning for the signature block, as soon as
+ ** this check is done.
+ */
+ handle->content_handler=check_signature_block;
+ handle->sig_block_index=0;
+
+ if (ptr && *ptr == '\n' && handle->was_previous_quote_level)
+ {
+ EMIT_LINE_END(handle);
+ EMIT_LINE_BEGIN(handle);
+ handle->was_previous_quote_level=0;
+ }
+
+ return check_signature_block(handle, ptr, cnt);
+}
+
+
+static const unicode_char sig_block[]={'-', '-', ' '};
+
+/* Checking for a magical sig block */
+
+static size_t check_signature_block(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ if (ptr && *ptr == sig_block[handle->sig_block_index])
+ {
+ if (++handle->sig_block_index == sizeof(sig_block)
+ /sizeof(sig_block[0]))
+
+ /* Well, it's there, but does a NL follow? */
+ handle->content_handler=seen_sig_block;
+ return 1;
+ }
+
+ return seen_notsig_block(handle, ptr, cnt);
+}
+
+static size_t seen_sig_block(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ if (ptr == NULL || *ptr == '\n')
+ {
+ /*
+ ** If the previous line was flowed, the sig block is not
+ ** considered to be flowable-into content, so terminate
+ ** the previous line before emitting the sig block.
+ */
+
+ if (handle->was_previous_quote_level)
+ {
+ EMIT_LINE_END(handle);
+ EMIT_LINE_BEGIN(handle);
+ handle->was_previous_quote_level=0;
+ }
+
+ /* Pass through the sig block */
+
+ handle->content_handler=start_of_line;
+
+ EMIT_LINE_CONTENTS(handle, sig_block,
+ sizeof(sig_block)/sizeof(sig_block[0]));
+ EMIT_LINE_END(handle);
+ return ptr ? 1:0;
+ }
+
+ return seen_notsig_block(handle, ptr, cnt);
+}
+
+/* This is not a sig block line */
+
+static size_t seen_notsig_block(rfc3676_parser_t handle,
+ const unicode_char *newptr, size_t newcnt)
+{
+ const unicode_char *ptr;
+ size_t i;
+
+ if (handle->was_previous_quote_level)
+ emit_line_flowed_wrap(handle);
+
+ handle->content_handler=scan_content_line;
+
+ ptr=sig_block;
+ i=handle->sig_block_index;
+
+ while (i && handle->errflag == 0)
+ {
+ size_t n=(*handle->content_handler)(handle, ptr, i);
+
+ ptr += n;
+ i -= n;
+ }
+
+ return (*handle->content_handler)(handle, newptr, newcnt);
+}
+
+/*
+** Pass through the line, until encountering an NL, or a space in flowable
+** content.
+*/
+
+static size_t scan_content_line(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ size_t i;
+
+ for (i=0; ptr && i<cnt && ptr[i] != '\n' &&
+ (ptr[i] != ' ' || !handle->info.isflowed); ++i)
+ ;
+
+ /* Pass through anything before the NL or potentially flowable SP */
+
+ if (i)
+ EMIT_LINE_CONTENTS(handle, ptr, i);
+
+ if (i)
+ return i;
+
+ if (ptr && ptr[i] == ' ')
+ {
+ handle->content_handler=seen_content_sp;
+ return 1;
+ }
+
+ /* NL. This line does not flow */
+ EMIT_LINE_END(handle);
+
+ handle->content_handler=start_of_line;
+
+ return ptr ? 1:0;
+}
+
+static size_t seen_content_sp(rfc3676_parser_t handle,
+ const unicode_char *ptr, size_t cnt)
+{
+ unicode_char sp=' ';
+
+ handle->content_handler=scan_content_line;
+
+ if (ptr == NULL || *ptr != '\n')
+ {
+ /*
+ ** SP was not followed by the NL. Pass through the space,
+ ** then resume scanning.
+ */
+ EMIT_LINE_CONTENTS(handle, &sp, 1);
+ return scan_content_line(handle, ptr, cnt);
+ }
+
+ /* NL after a SP -- flowed line */
+
+ if (!handle->info.isdelsp)
+ EMIT_LINE_CONTENTS(handle, &sp, 1);
+
+ handle->has_previous_quote_level=1;
+ handle->previous_quote_level=handle->quote_level;
+ handle->content_handler=start_of_line;
+ return ptr ? 1:0;
+}
+
+/**************************************************************************/
+
+/*
+** At this point, the processing has reduced to the following API:
+**
+** + begin logical line
+**
+** + contents of the logical line (multiple consecutive invocations)
+**
+** + the logical line has flowed onto the next physical line
+**
+** + end of logical line
+**
+** The third one, logical line flowed, is normally used for flowed text,
+** by definition. But, it may also be get used if non-flowed text gets
+** rewrapped when broken formatting is detected.
+**
+** Provide default implementations of the other three API calls that
+** simply invoke the corresponding user callback.
+*/
+
+static void emit_line_begin(rfc3676_parser_t handle)
+{
+ if (handle->errflag == 0)
+ handle->errflag=(*handle->info.line_begin)(handle->quote_level,
+ handle->info.arg);
+}
+
+static void emit_line_flowed_wrap(rfc3676_parser_t handle)
+{
+ if (handle->errflag == 0 && handle->info.line_flowed_notify)
+ handle->errflag=(*handle->info.line_flowed_notify)
+ (handle->info.arg);
+}
+
+static void emit_line_contents(rfc3676_parser_t handle,
+ const unicode_char *uc,
+ size_t cnt)
+{
+ if (handle->errflag == 0 && cnt > 0)
+ handle->errflag=(*handle->info.line_contents)
+ (uc, cnt, handle->info.arg);
+}
+
+static void emit_line_end(rfc3676_parser_t handle)
+{
+ if (handle->errflag == 0)
+ handle->errflag=(*handle->info.line_end)(handle->info.arg);
+}
+
+/*
+** When processing a non-flowed text, handle broken mail formatters (I'm
+** looking at you, Apple Mail) that spew out quoted-printable content with
+** each decoded line forming a single paragraph. This is heuristically
+** detected by looking for lines that exceed a wrapping threshold, then
+** rewrapping them.
+**
+** Redefine the three line API calls to launder the logical line via
+** the linebreak API.
+*/
+
+static void initial_nonflowed_line(rfc3676_parser_t handle,
+ int linebreak_opportunity,
+ unicode_char ch,
+ size_t ch_width);
+
+static void initial_nonflowed_end(rfc3676_parser_t handle);
+
+static void begin_forced_rewrap(rfc3676_parser_t handle);
+
+/*
+** A non-flowed line begins. Initialize the linebreaking module.
+*/
+static void nonflowed_line_begin(rfc3676_parser_t handle)
+{
+ if (handle->lb)
+ {
+ /* Just in case */
+
+ int rc=unicode_lbc_end(handle->lb);
+
+ if (rc && handle->errflag == 0)
+ handle->errflag=rc;
+ }
+
+ if ((handle->lb=unicode_lbc_init(nonflowed_line_process, handle))
+ == NULL)
+ {
+ if (handle->errflag == 0)
+ handle->errflag=-1;
+ }
+
+ if (handle->lb)
+ unicode_lbc_set_opts(handle->lb,
+ UNICODE_LB_OPT_PRBREAK
+ | UNICODE_LB_OPT_SYBREAK);
+
+ unicode_buf_clear(&handle->nonflowed_line);
+ unicode_buf_clear(&handle->nonflowed_next_word);
+
+ handle->nonflowed_line_width=0;
+ handle->nonflowed_next_word_width=0;
+
+ handle->nonflowed_line_process=initial_nonflowed_line;
+ handle->nonflowed_line_end=initial_nonflowed_end;
+ emit_line_begin(handle); /* Fallthru - user callback */
+
+ handle->nonflowed_line_target_width=
+ handle->quote_level < NONFLOWED_WRAP_REDUCE - 20 ?
+ NONFLOWED_WRAP_REDUCE - handle->quote_level:20;
+}
+
+/*
+** Process contents of non-flowed lines. The contents are submitted to the
+** linebreaking API.
+*/
+
+static void nonflowed_line_contents(rfc3676_parser_t handle,
+ const unicode_char *uc,
+ size_t cnt)
+{
+ if (!handle->lb)
+ return;
+
+ while (cnt)
+ {
+ if (handle->errflag == 0)
+ handle->errflag=unicode_lbc_next(handle->lb, *uc);
+
+ ++uc;
+ --cnt;
+ }
+}
+
+/*
+** End of non-flowed content. Terminate the linebreaking API, then invoke
+** the current end-of-line handler.
+*/
+static void nonflowed_line_end(rfc3676_parser_t handle)
+{
+ if (handle->lb)
+ {
+ int rc=unicode_lbc_end(handle->lb);
+
+ if (rc && handle->errflag == 0)
+ handle->errflag=rc;
+
+ handle->lb=NULL;
+ }
+
+ (*handle->nonflowed_line_end)(handle);
+ emit_line_end(handle); /* FALLTHRU */
+}
+
+/*
+** Callback from the linebreaking API, gives us the next unicode character
+** and its linebreak property. Look up the unicode character's width, then
+** invoke the current handler.
+*/
+static int nonflowed_line_process(int linebreak_opportunity,
+ unicode_char ch, void *dummy)
+{
+ rfc3676_parser_t handle=(rfc3676_parser_t)dummy;
+
+ (*handle->nonflowed_line_process)(handle, linebreak_opportunity, ch,
+ unicode_wcwidth(ch));
+
+ return 0;
+}
+
+/*
+** Collecting initial nonflowed line.
+*/
+
+static void initial_nonflowed_line(rfc3676_parser_t handle,
+ int linebreak_opportunity,
+ unicode_char ch,
+ size_t ch_width)
+{
+ /*
+ ** Collect words into nonflowed_line as long as it fits within the
+ ** targeted width.
+ */
+ if (linebreak_opportunity != UNICODE_LB_NONE &&
+ handle->nonflowed_line_width + handle->nonflowed_next_word_width
+ <= handle->nonflowed_line_target_width)
+ {
+ unicode_buf_append_buf(&handle->nonflowed_line,
+ &handle->nonflowed_next_word);
+ handle->nonflowed_line_width +=
+ handle->nonflowed_next_word_width;
+
+ unicode_buf_clear(&handle->nonflowed_next_word);
+ handle->nonflowed_next_word_width=0;
+ }
+
+ /*
+ ** Add the character to the growing word.
+ **
+ ** If the line's size now exceeds the target width by quite a bit,
+ ** we've had enough!
+ */
+
+ unicode_buf_append(&handle->nonflowed_next_word, &ch, 1);
+ handle->nonflowed_next_word_width += ch_width;
+
+ if (handle->nonflowed_line_width + handle->nonflowed_next_word_width
+ > handle->nonflowed_line_target_width
+ + NONFLOWED_THRESHOLD_EXCEEDED)
+ begin_forced_rewrap(handle);
+}
+
+/*
+** End of line handler. The line did not reach its threshold, so output it.
+*/
+static void initial_nonflowed_end(rfc3676_parser_t handle)
+{
+ emit_line_contents(handle,
+ unicode_buf_ptr(&handle->nonflowed_line),
+ unicode_buf_len(&handle->nonflowed_line));
+
+ emit_line_contents(handle,
+ unicode_buf_ptr(&handle->nonflowed_next_word),
+ unicode_buf_len(&handle->nonflowed_next_word));
+}
+
+/*
+** Check for the abnormal situation where we're ready to wrap something but
+** nonflowed_line is empty because all this text did not have a linebreaking
+** opportunity.
+*/
+
+static void check_abnormal_line(rfc3676_parser_t handle)
+{
+ size_t n, i;
+ const unicode_char *p;
+
+ if (unicode_buf_len(&handle->nonflowed_line) > 0)
+ return;
+
+ /* Extreme times call for extreme measures */
+
+ n=unicode_buf_len(&handle->nonflowed_next_word);
+ p=unicode_buf_ptr(&handle->nonflowed_next_word);
+
+ for (i=n; i>0; --i)
+ {
+ if (i < n && unicode_grapheme_break(p[i-1], p[i]))
+ {
+ n=i;
+ break;
+ }
+ }
+
+ unicode_buf_append(&handle->nonflowed_line, p, n);
+ unicode_buf_remove(&handle->nonflowed_next_word, 0, n);
+
+ /*
+ ** Recalculate the width of the growing word, now.
+ */
+
+ handle->nonflowed_next_word_width=0;
+ p=unicode_buf_ptr(&handle->nonflowed_next_word);
+
+ for (i=0; i<unicode_buf_len(&handle->nonflowed_next_word); ++i)
+ handle->nonflowed_next_word_width +=
+ unicode_wcwidth(p[i]);
+}
+
+/*
+** We've decided that the line is too long, so begin rewrapping it.
+*/
+
+static void forced_rewrap_line(rfc3676_parser_t handle,
+ int linebreak_opportunity,
+ unicode_char ch,
+ size_t ch_width);
+
+static void forced_rewrap_end(rfc3676_parser_t handle);
+
+/*
+** Emit nonflowed_line as the rewrapped line. Clear the buffer.
+*/
+static void emit_rewrapped_line(rfc3676_parser_t handle)
+{
+ check_abnormal_line(handle);
+ emit_line_contents(handle, unicode_buf_ptr(&handle->nonflowed_line),
+ unicode_buf_len(&handle->nonflowed_line));
+
+ emit_line_flowed_wrap(handle);
+
+ /* nonflowed_line is now empty */
+ unicode_buf_clear(&handle->nonflowed_line);
+ handle->nonflowed_line_width=0;
+}
+
+static void begin_forced_rewrap(rfc3676_parser_t handle)
+{
+ handle->nonflowed_line_process=forced_rewrap_line;
+ handle->nonflowed_line_end=forced_rewrap_end;
+ emit_rewrapped_line(handle);
+}
+
+static void forced_rewrap_line(rfc3676_parser_t handle,
+ int linebreak_opportunity,
+ unicode_char ch,
+ size_t ch_width)
+{
+ if (linebreak_opportunity != UNICODE_LB_NONE)
+ {
+ /* Found a linebreaking opportunity */
+
+ if (handle->nonflowed_line_width
+ + handle->nonflowed_next_word_width
+ > handle->nonflowed_line_target_width)
+ {
+ /* Accumulated word is too long */
+ emit_rewrapped_line(handle);
+ }
+
+ unicode_buf_append_buf(&handle->nonflowed_line,
+ &handle->nonflowed_next_word);
+
+ handle->nonflowed_line_width +=
+ handle->nonflowed_next_word_width;
+ unicode_buf_clear(&handle->nonflowed_next_word);
+ handle->nonflowed_next_word_width=0;
+ }
+
+ /*
+ ** Check for another excessively long line.
+ */
+
+ if (handle->nonflowed_line_width == 0 &&
+ handle->nonflowed_next_word_width + ch_width
+ > handle->nonflowed_line_target_width)
+ {
+ emit_rewrapped_line(handle);
+ }
+
+ unicode_buf_append(&handle->nonflowed_next_word, &ch, 1);
+ handle->nonflowed_next_word_width += ch_width;
+}
+
+static void forced_rewrap_end(rfc3676_parser_t handle)
+{
+ initial_nonflowed_end(handle); /* Same logic, for now */
+}
+