diff options
Diffstat (limited to 'unicode/book.xml')
| -rw-r--r-- | unicode/book.xml | 223 |
1 files changed, 206 insertions, 17 deletions
diff --git a/unicode/book.xml b/unicode/book.xml index 3995cea..544b47f 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -64,6 +64,13 @@ See COPYING for distribution information. script property</ulink>. </para> </listitem> + <listitem> + <para> + Look up the + <ulink url="http://unicode.org/notes/tn36/">category</ulink> + property. + </para> + </listitem> </itemizedlist> <para> @@ -165,8 +172,8 @@ See COPYING for distribution information. <link linkend="unicode_html40ent_lookup"> <citerefentry><refentrytitle>unicode_html40ent_lookup</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, - <link linkend="unicode_isspace"> - <citerefentry><refentrytitle>unicode_isspace</refentrytitle> + <link linkend="unicode_category_lookup"> + <citerefentry><refentrytitle>unicode_category_lookup</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, <link linkend="unicode_grapheme_break"> <citerefentry><refentrytitle>unicode_grapheme_break</refentrytitle> @@ -697,42 +704,224 @@ See COPYING for distribution information. </refsect1> </refentry> - <refentry id="unicode_isspace"> + <refentry id="unicode_category_lookup"> <info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> <refmeta> - <refentrytitle>unicode_isspace</refentrytitle> + <refentrytitle>unicode_category_lookup</refentrytitle> <manvolnum>3</manvolnum> </refmeta> <refnamediv> + <refname>unicode_category_lookup</refname> + <refname>unicode_isalnum</refname> + <refname>unicode_isalpha</refname> + <refname>unicode_isblank</refname> + <refname>unicode_isdigit</refname> + <refname>unicode_isgraph</refname> + <refname>unicode_islower</refname> + <refname>unicode_ispunct</refname> <refname>unicode_isspace</refname> - <refpurpose>unicode character classification</refpurpose> + <refname>unicode_isupper</refname> + + <refpurpose>unicode character categorization</refpurpose> </refnamediv> <refsynopsisdiv> <funcsynopsis> <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> <funcprototype> + <funcdef>uint32_t <function>unicode_category_lookup</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_isalnum</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_isalpha</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_isblank</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_isdigit</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_isgraph</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_islower</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_ispunct</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> + + <funcprototype> <funcdef>int <function>unicode_isspace</function></funcdef> <paramdef>unicode_char <parameter>c</parameter></paramdef> </funcprototype> + + <funcprototype> + <funcdef>int <function>unicode_isupper</function></funcdef> + <paramdef>unicode_char <parameter>c</parameter></paramdef> + </funcprototype> </funcsynopsis> </refsynopsisdiv> <refsect1> <title>DESCRIPTION</title> + <para> - <function>unicode_isspace</function>() returns non-zero - if the unicode character <parameter>c</parameter>'s line breaking - class is - <literal>BK</literal>, - <literal>CR</literal>, - <literal>LF</literal>, - <literal>NL</literal>, or - <literal>SP</literal>; and zero for all other characters. - Those line breaking classes include the traditional ASCII control - characters, and several additional unicode characters. + <function>unicode_category_lookup</function>() looks up the + <ulink url="http://unicode.org/notes/tn36/">unicode character's + categorization</ulink>. + <function>unicode_category_lookup</function>() returns a 32 bit + value. + The value's + <symbol>UNICODE_CATEGORY_1</symbol> bits specify the first level + of the unicode character's category, with + <symbol>UNICODE_CATEGORY_2</symbol>, + <symbol>UNICODE_CATEGORY_3</symbol>, and + <symbol>UNICODE_CATEGORY_4</symbol> bits specifying the 2nd, + 3rd, and 4th level, if given. A value of 0 for each corresponding + bit set indicates that no category is specified for this level, + for this character; otherwise the possible values are defined + in <filename><courier-unicode.h></filename>. </para> + + <para> + The remaining functions implement comparable equivalents of + their non-unicode versions in the standard C library, as follows: + </para> + + <variablelist> + <varlistentry> + <term><function>unicode_isalnum</function>()</term> + <listitem> + <para> + Returns non-0 for all + <function>unicode_isalpha</function>() or + <function>unicode_isdigit</function>(). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_isalpha</function>()</term> + <listitem> + <para> + Returns non-0 for all + <symbol>UNICODE_CATEGORY_1_LETTER</symbol>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_isblank</function>()</term> + <listitem> + <para> + Return non-0 for + <symbol>TAB</symbol>, and all + <symbol>UNICODE_CATEGORY_2_SPACE</symbol>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_isdigit</function>()</term> + <listitem> + <para> + Returns non-0 for all + <symbol>UNICODE_CATEGORY_1_NUMBER</symbol> + | <symbol>UNICODE_CATEGORY_2_DIGIT</symbol>, + only (no third categories). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_isgraph</function>()</term> + <listitem> + <para> + Returns non-0 for all codepoints above + <symbol>SPACE</symbol> which are not + <function>unicode_isspace</function>(). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_islower</function>()</term> + <listitem> + <para> + Returns non-0 for all + <function>unicode_isalpha</function>() for which the + character is + equal to + <link linkend="unicode_uc"> + <citerefentry><refentrytitle>unicode_lc</refentrytitle> + <manvolnum>3</manvolnum></citerefentry></link> + of itself. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_ispunct</function>()</term> + <listitem> + <para> + Returns non-0 for all + <symbol>UNICODE_CATEGORY_1_PUNCTUATION</symbol>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_isspace</function>()</term> + <listitem> + <para> + Returns non-0 for unicode_isblank() or + for unicode characters + with linebreaking properties of + <symbol>BK</symbol>, + <symbol>CR</symbol>, + <symbol>LF</symbol>, + <symbol>NL</symbol>, + and + <symbol>SP</symbol>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><function>unicode_isupper</function>()</term> + <listitem> + <para> + Returns non-0 for all + <function>unicode_isalpha</function>() for which the + character is + equal to + <link linkend="unicode_uc"> + <citerefentry><refentrytitle>unicode_uc</refentrytitle> + <manvolnum>3</manvolnum></citerefentry></link> + of itself. + </para> + </listitem> + </varlistentry> + </variablelist> </refsect1> <refsect1> <title>SEE ALSO</title> @@ -1481,8 +1670,8 @@ See COPYING for distribution information. <link linkend="unicode_html40ent_lookup"> <citerefentry><refentrytitle>unicode_html40ent_lookup</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, - <link linkend="unicode_isspace"> - <citerefentry><refentrytitle>unicode_isspace</refentrytitle> + <link linkend="unicode_category_lookup"> + <citerefentry><refentrytitle>unicode_category_lookup</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, <link linkend="unicode_grapheme_break"> <citerefentry><refentrytitle>unicode_grapheme_break</refentrytitle> |
