diff options
| author | Sam Varshavchik | 2020-07-12 09:44:24 -0400 | 
|---|---|---|
| committer | Sam Varshavchik | 2020-08-02 14:56:50 -0400 | 
| commit | d2915c9cadf6fbc5ae29ffc387cce987b88dbbe0 (patch) | |
| tree | f76c8edf36fb84c6e082f2a4ae9798b10aeda70e /unicode/biditest2.C | |
| parent | 51471a4d8b177adfcd40c145a809193a4ab9bd8d (diff) | |
| download | courier-libs-d2915c9cadf6fbc5ae29ffc387cce987b88dbbe0.tar.bz2 | |
Add additional bi-directional related algorithm.
Cleanup, remove markers, via unicode_bidi_cleanup() and
unicode_bidi_extra_cleanup().
Re-embed directional markers, via unicode_bidi_logical_order(),
unicode_bidi_embed() and unicode_bidi_embed_paragraph_level().
Diffstat (limited to 'unicode/biditest2.C')
| -rw-r--r-- | unicode/biditest2.C | 289 | 
1 files changed, 271 insertions, 18 deletions
| diff --git a/unicode/biditest2.C b/unicode/biditest2.C index f497bcf..cfa0e50 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -1,42 +1,110 @@  #include	"unicode_config.h"  #include	"courier-unicode.h"  #include	<iostream> +#include	<iterator>  #include	<sstream>  #include	<fstream>  #include	<cstdint>  #include	<iomanip> +#include	<algorithm> +#include	<unistd.h>  FILE *DEBUGDUMP; -int main(int argc, char **argv) +#define BIDI_DEBUG + +extern "C" { +#if 0 +} +#endif + +#include "unicode_bidi.c" + +} + +void latin_test()  { -	std::ifstream fp("BidiCharacterTest.txt"); +	for (char32_t c=32; c<256; c++) +	{ +		std::u32string s; -	if (!fp.is_open()) +		s += c; + +		std::vector<unicode_bidi_level_t> levels={UNICODE_BIDI_LR}; + +		auto new_string=unicode::bidi_embed(s, levels, +						    UNICODE_BIDI_LR); + +		if (new_string != s) +		{ +			std::cerr << "Character " << (int)c +				  << " does not work." << std::endl; +			exit(1); +		} +	} + +	std::u32string s; +	std::vector<unicode_bidi_level_t> levels; + +	for (char32_t c=32; c<256; c++)  	{ -		std::cerr << "Cannot open BidiCharacterTest.txt" << std::endl; +		s += c; +		levels.push_back(UNICODE_BIDI_LR); +	} + +	auto new_string=unicode::bidi_embed(s, levels, +					    UNICODE_BIDI_LR); + +	if (new_string != s) +	{ +		std::cerr << "iso-8859-1 string does not work." +			  << std::endl;  		exit(1);  	} +} -	DEBUGDUMP=fopen("/dev/null", "w"); -	if (!DEBUGDUMP) +void character_test() +{ +	std::ifstream fp("BidiCharacterTest.txt"); + +	if (!fp.is_open())  	{ -		perror("/dev/null"); +		std::cerr << "Cannot open BidiCharacterTest.txt" << std::endl;  		exit(1);  	}  	std::string buf;  	size_t linenum=0; +	size_t nextlogline=0; +	std::string logmsg;  	while (1)  	{  		buf.clear(); -		if (std::getline(fp, buf).eof() && buf.empty()) -			break; -		++linenum; +		bool iseof=std::getline(fp, buf).eof() && buf.empty(); + +		if (iseof || ++linenum >= nextlogline) +		{ +			alarm(300); +			std::cout << logmsg; + +			std::ostringstream o; +			o << std::setw(6) << linenum << " lines processed... "; + +			logmsg=o.str(); + +			std::cout << logmsg << std::flush; + +			std::fill(logmsg.begin(), logmsg.end(), '\b'); + +			nextlogline += 20000; +		} + +		if (iseof) +			break;  		auto p=buf.find('#');  		if (p != buf.npos) @@ -187,17 +255,202 @@ int main(int argc, char **argv)  			std::cerr << std::endl;  			exit(1);  		} -	} -	return 0; -} -#define BIDI_DEBUG +		std::vector<size_t> actual_render_order; + +		size_t n=0; + +		std::generate_n(std::back_inserter(actual_render_order), +				s.size(), +				[&] { return n++; }); + +		unicode::bidi_reorder +			(s, levels, +			 [&] +			 (size_t index, +			  size_t n) +			 { +				 auto b=actual_render_order.begin(); +				 std::reverse(b+index, b+index+n); +			 }); + +		n=0; +		unicode::bidi_cleanup +			(s, levels, +			 [&] +			 (size_t i) +			 { +				 actual_render_order.erase +					 (actual_render_order.begin()+i-n); +				 ++n; +			 }); + +		if (render_order != actual_render_order) +		{ +			std::cerr << "Regression, line " +				  << linenum +				  << ": render order" +				  << std::endl +				  << "   Expected:"; +			for (auto n:render_order) +			{ +				std::cerr << " " << n; +			} +			std::cerr << std::endl +				  << "     Actual:"; -extern "C" { -#if 0 +			for (auto n:actual_render_order) +			{ +				std::cerr << " " << n; +			} +			std::cerr << std::endl; +			exit(1); +		} + +		unicode::bidi_extra_cleanup(s, levels); + +		auto dump_ls= +			[&] +			(const std::u32string &s, +			 const std::vector<unicode_bidi_level_t> &l) +			{ +				for (size_t i=0; i<s.size(); ++i) +				{ +					std::cerr << " " << std::hex +						  << std::setw(4) +						  << std::setfill('0') +						  << s[i] << "/" +						  << std::dec +						  << (int)l[i]; +				} +			}; + +		for (int pass=0; pass<4; pass++) +		{ +			int paragraph=pass & 1; +			int use_default=pass & 2; + +			for (size_t i=0; i<s.size(); ++i) +			{ +				/* L1 */ +				switch (unicode_bidi_type(s[i])) { +				case UNICODE_BIDI_TYPE_S: +				case UNICODE_BIDI_TYPE_B: +					levels.at(i)=paragraph; +				} +			} + +			auto logical_string=s; +			auto logical_levels=levels; + +			unicode::bidi_logical_order(logical_string, +						    logical_levels, +						    paragraph); + +			auto new_string=unicode::bidi_embed(logical_string, +							    logical_levels, +							    paragraph); + +			auto save_string=new_string; + +			if (use_default) +			{ +				auto marker=unicode::bidi_embed_paragraph_level +					(new_string, paragraph); + +				if (marker) +					new_string.insert(0, 1, marker); + +				ret=unicode::bidi_calc(new_string); +			} +			else +			{ +				ret=unicode::bidi_calc(new_string, paragraph); +			} + +			unicode::bidi_reorder(new_string, std::get<0>(ret)); +			unicode::bidi_extra_cleanup(new_string, +						    std::get<0>(ret)); + +			/* New string is now back in logical order */ + +			if (new_string == s && std::get<0>(ret) == levels) +				continue; + +			fclose(DEBUGDUMP); +			DEBUGDUMP=stderr; + +			std::cerr << "Regression, line " +				  << linenum +				  << ": embedding markers" +				  << std::endl +				  << "   Paragraph embedding level: " +				  << paragraph; + +			if (use_default) +				std::cerr << " (defaulted)"; + +			std::cerr << std::endl +				  << "String (1):"; + +			dump_ls(s, levels); + +			std::cerr << std::endl << "String (2):"; + +			dump_ls(new_string, std::get<0>(ret)); +			std::cerr << std::endl; + +			std::cerr << "Embedding:"; +			dump_ls(logical_string, logical_levels); +			std::cerr << std::endl; + +			unicode::bidi_embed(logical_string, +					    logical_levels, +					    paragraph); + +			std::cerr << std::endl +				  << "Embedded string:"; + +			for (auto c:save_string) +			{ +				std::cerr << " "; + +				switch (c) { +				case LRM: std::cerr << "LRM"; break; +				case RLM: std::cerr << "RLM"; break; +				case RLI: std::cerr << "RLI"; break; +				case LRI: std::cerr << "LRI"; break; +				case RLO: std::cerr << "RLO"; break; +				case LRO: std::cerr << "LRO"; break; +				case PDF: std::cerr << "PDF"; break; +				case PDI: std::cerr << "PDI"; break; +				default: +					std::cerr << std::hex << std::setw(4) +						  << std::setfill('0') +						  << c; +					break; +				} +			} +			std::cerr << std::dec << std::endl << std::flush; + +			ret=unicode::bidi_calc(save_string, paragraph); +			unicode::bidi_reorder(save_string, std::get<0>(ret)); +			exit(1); +		} +	} +	std::cout << std::endl;  } -#endif -#include "unicode_bidi.c" +int main(int argc, char **argv) +{ +	DEBUGDUMP=fopen("/dev/null", "w"); +	if (!DEBUGDUMP) +	{ +		perror("/dev/null"); +		exit(1); +	} +	latin_test(); +	character_test(); +	return 0;  } | 
