Man page - unicode::bidi(3)
Packages contains this manual
- unicode_lb_end(3)
- unicode_derived_changes_when_casemapped_lookup(3)
- unicode_bidi_cleaned_size(3)
- unicode_locale_chset(3)
- unicode_emoji_extended_pictographic(3)
- unicode_convert_tou_tobuf(3)
- courier-unicode(7)
- unicode_derived_case_ignorable_lookup(3)
- unicode::ucs_2(3)
- unicode_convert_tobuf(3)
- unicode_tc(3)
- unicode_convert_tocbuf_init(3)
- unicode_default_chset(3)
- unicode::linebreakc_iter(3)
- unicode_emoji_lookup(3)
- unicode::iconvert::fromu(3)
- unicode_lb_next_cnt(3)
- unicode_convert(3)
- unicode_derived_xid_start_lookup(3)
- unicode_composition_deinit(3)
- unicode_emoji_modifier_base(3)
- unicode_bidi_calc_levels(3)
- unicode_canonical(3)
- unicode::ucs_4(3)
- unicode::bidi_logical_order(3)
- unicode::bidi_embed_paragraph_level(3)
- unicode::bidi_get_direction(3)
- unicode_isalnum(3)
- unicode_lb_set_opts(3)
- unicode_isspace(3)
- unicode_grapheme_break_init(3)
- unicode_lbc_end(3)
- unicode_general_category_lookup(3)
- unicode::iconvert::convert(3)
- unicode_script(3)
- unicode::bidi(3)
- unicode_derived_incb_lookup(3)
- unicode_ispunct(3)
- unicode_convert_fromutf8(3)
- unicode::bidi_reorder(3)
- unicode_decomposition_init(3)
- unicode_bidi_reorder(3)
- unicode_wbscan_init(3)
- unicode_convert_tocbuf_toutf8_init(3)
- unicode_wb_next(3)
- unicode_bidi_setbnl(3)
- unicode_bidi_direction(3)
- unicode::decompose_default_reallocate(3)
- unicode_html40ent_lookup(3)
- unicode_derived_changes_when_casefolded_lookup(3)
- unicode_emoji_modifier(3)
- unicode_convert_init(3)
- unicode_grapheme_break(3)
- unicode_bidi_logical_order(3)
- unicode_emoji_presentation(3)
- unicode_bidi_mirror(3)
- unicode::canonical(3)
- unicode::bidi_override(3)
- unicode_wb_next_cnt(3)
- unicode::linebreak_callback_save_buf(3)
- unicode_derived_cased_lookup(3)
- unicode::compose(3)
- unicode::wordbreak_callback_base(3)
- unicode::iso_8859_1(3)
- unicode_grapheme_break_deinit(3)
- unicode_derived_grapheme_base_lookup(3)
- unicode_derived_default_ignorable_code_point_lookup(3)
- unicode_derived_changes_when_titlecased_lookup(3)
- unicode_derived_lowercase_lookup(3)
- unicode_convert_deinit(3)
- unicode_bidi_combinings(3)
- unicode_derived_grapheme_extend_lookup(3)
- unicode::bidi_embed(3)
- unicode_uc(3)
- unicode_composition_apply(3)
- unicode_bidi_type(3)
- unicode_derived_uppercase_lookup(3)
- unicode_derived_id_start_lookup(3)
- unicode::tolower(3)
- unicode_derived_changes_when_lowercased_lookup(3)
- unicode::linebreak_callback_base(3)
- unicode_compose(3)
- unicode_grapheme_break_next(3)
- unicode_isupper(3)
- unicode_lbc_next_cnt(3)
- unicode::linebreak_iter(3)
- unicode_bidi(3)
- unicode_decompose_reallocate_size(3)
- unicode::toupper(3)
- unicode::linebreakc_callback_base(3)
- unicode::bidi_cleanup(3)
- unicode::iconvert::convert_tocase(3)
- unicode_composition_init(3)
- unicode_lbc_next(3)
- unicode_bidi_bracket_type(3)
- unicode_convert_fromu_init(3)
- unicode_category_lookup(3)
- unicode_ccc(3)
- unicode_emoji(3)
- unicode::iconvert::tou(3)
- unicode_bidi_needs_embed(3)
- unicode_derived_id_continue_lookup(3)
- unicode_convert_uc(3)
- unicode_emoji_component(3)
- unicode_u_ucs2_native(3)
- unicode::wordbreak(3)
- unicode_wbscan_end(3)
- unicode::compose_default_callback(3)
- unicode_bidi_calc(3)
- unicode_isalpha(3)
- unicode::decompose(3)
- unicode_convert_toutf8(3)
- unicode_line_break(3)
- unicode_derived_math_lookup(3)
- unicode_derived_core_properties(3)
- unicode_bidi_cleanup(3)
- unicode_derived_xid_continue_lookup(3)
- unicode_wbscan_next(3)
- unicode_islower(3)
- unicode_convert_tocase(3)
- unicode_word_break(3)
- unicode_convert_tou_init(3)
- unicode_derived_grapheme_link_lookup(3)
- unicode_bidi_embed_paragraph_level(3)
- unicode_bidi_calc_types(3)
- unicode_lc(3)
- unicode_wb_end(3)
- unicode_isdigit(3)
- unicode_isblank(3)
- unicode_derived_changes_when_uppercased_lookup(3)
- unicode::bidi_calc(3)
- unicode_decomposition_deinit(3)
- unicode::utf_8(3)
- unicode_decompose(3)
- unicode_bidi_embed(3)
- unicode_lb_next(3)
- unicode_u_ucs4_native(3)
- unicode::bidi_calc_types(3)
- unicode_isgraph(3)
- unicode_wb_init(3)
- unicode_convert_tocbuf_fromutf8_init(3)
- unicode::bidi_combinings(3)
- unicode_lbc_set_opts(3)
- unicode_lbc_init(3)
- unicode::bidi_needs_embed(3)
- unicode_convert_fromu_tobuf(3)
- unicode_lb_init(3)
apt-get install libcourier-unicode-dev
Manual
UNICODE::BIDI
NAMESYNOPSIS
DESCRIPTION
unicode::literals namespace
SEE ALSO
AUTHOR
NOTES
NAME
unicode::bidi, unicode::bidi_calc, unicode::bidi_calc_types, unicode::bidi_reorder, unicode::bidi_cleanup, unicode::bidi_logical_order, unicode::bidi_combinings, unicode::bidi_needs_embed, unicode::bidi_embed, unicode::bidi_embed_paragraph_level, unicode::bidi_get_direction, unicode::bidi_override - unicode bi-directional algorithm
SYNOPSIS
#include <courier-unicode.h>
struct
unicode::bidi_calc_types {
bidi_calc_types(const std::u32string & string);
std::vector<unicode_bidi_type_t> types ;
void setbnl(std::u32string & string);
}.fi
|
std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types & ustring ); |
|
|
std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types & ustring , unicode_bidi_level_t embedding_level ); |
|
|
int unicode::bidi_reorder(std::u32string & string , std::vector<unicode_bidi_level_t> & embedding_level , const std::function<void (size_t, size_t)> & reorder_callback =[](size_t, size_t){}, size_t starting_pos =0, size_t n =(size_t)-1); |
|
|
void unicode::bidi_reorder(std::vector<unicode_bidi_level_t> & embedding_level , const std::function<void (size_t, size_t)> & reorder_callback =[](size_t, size_t){}, size_t starting_pos =0, size_t n =(size_t)-1); |
|
|
void unicode::bidi_cleanup(std::u32string & string , const std::function<void (size_t)> & removed_callback =[](size_t){}, int cleanup_options ); |
|
|
int unicode::bidi_cleanup(std::u32string & string , std::vector <unicode_bidi_level_t> & levels , const std::function<void (size_t)> & removed_callback =[](size_t){}, int cleanup_options =0); |
|
|
int unicode::bidi_cleanup(std::u32string & string , std::vector <unicode_bidi_level_t> & levels , const std::function<void (size_t)> & removed_callback , int cleanup_options , size_t starting_pos , size_t n ); |
|
|
int unicode::bidi_logical_order(std::u32string & string , std::vector <unicode_bidi_level_t> & levels , unicode_bidi_level_t paragraph_embedding , const std::function<void (size_t, size_t)> & reorder_callback =[](size_t, size_t){}, size_t starting_pos =0, size_t n =(size_t)-1); |
|
|
void unicode::bidi_combinings(const std::u32string & string , const std::vector <unicode_bidi_level_t> & levels , const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> & callback ); |
|
|
void unicode::bidi_combinings(const std::u32string & string , const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> & callback ); |
|
|
void unicode::bidi_logical_order(std::vector <unicode_bidi_level_t> & levels , unicode_bidi_level_t paragraph_embedding , const std::function<void (size_t, size_t)> & reorder_callback , size_t starting_pos =0, size_t n =(size_t)-1); |
|
|
bool unicode::bidi_needs_embed(const std::u32string & string , const std::vector <unicode_bidi_level_t> & levels , const unicode_bidi_level_t ( paragraph_embedding =NULL, size_t starting_pos =0, size_t n =(size_t)-1); |
|
|
int unicode::bidi_embed(const std::u32string & string , const std::vector <unicode_bidi_level_t> & levels , unicode_bidi_level_t paragraph_embedding , const std::function<void (const char32_t *, size_t, bool)> & callback ); |
|
|
std::u32string unicode::bidi_embed(const std::u32string & string , const std::vector <unicode_bidi_level_t> & levels , unicode_bidi_level_t paragraph_embedding ); |
|
|
char32_t unicode_bidi_embed_paragraph_level(const std::u32string & string , unicode_bidi_level_t paragraph_embedding ); |
|
|
unicode_bidi_direction bidi_get_direction(const std::u32string & string , size_t starting_pos =0, size_t n =(size_t)-1); |
|
|
std::u32string bidi_override(const std::u32string & string , unicode_bidi_level_t direction , int cleanup_options =0); |
DESCRIPTION
These functions implement the C++ interface for the Unicode Bi-Directional algorithm [1] . See the description of the underlying unicode_bidi (3) C library API for more information. C++ specific notes:
• unicode::bidi_calc returns the directional embedding value buffer and the calculated paragraph embedding level. Its ustring is implicitly converted from a std::u32string:
std::u32string text;
auto [levels, direction]=unicode::bidi_calc(text);
Alternatively a unicode::bidi_calc_types objects gets constructed from the same std::u32string and then passed directly to unicode::bidi_calc :
std::u32string text;
unicode::bidi_calc_types types{text};
types.setbnl(text); // Optional
// types.types is a std::vector of enum_bidi_types_t values
auto [levels, direction]=unicode::bidi_calc(types);
This provides the means to access the intermediate enum_bidi_types_t values that get calculated from the Unicode text string.
Note
In all cases the std::u32string cannot be a temporary
object, and it must remain in scope until
unicode::bidi_calc
() returns.
The optional setbnl() method uses unicode_bidi_setbnl (3) to replace paragraph separators with newline characters, in the unicode string. It requires the same unicode string that was passed to the constructor as a parameter (because the constructor takes a constant reference, but this method modifies the string.
• Several C functions provide a “dry-run” mode by passing a NULL pointer. The C++ API provides separate overloads, with and without the nullable parameter.
• Several C functions accept a nullable function pointer, with the NULL function pointer specifying no callback. The C++ functions have a std::function parameter with a default do-nothing closure.
• Several C functions accept two parameters, a Unicode character pointer and the embedding level buffer, and a single parameter that specifies the size of both. The equivalent C++ function takes two discrete parameters, a std::u32string and a std::vector and returns an int; a negative value if their sizes differ, and 0 if their sizes match, and the requested function completes. The unicode::bidi_embed overload that returns a std::u32string returns an empty string in case of a mismatch.
• unicode::bidi_reorder reorders the entire string and its embedding_level s by default. The optional starting_pos and n parameters limit the reordering to the indicated subset of the original string (specified as the starting position offset index, and the number of characters).
• unicode::bidi_reorder , unicode::bidi_cleanup , unicode::bidi_logical_order , unicode::bidi_needs_embed and unicode::bidi_get_direction take two optional parameters (defaulted values or overloaded) specifying an optional starting position and number of characters that define a subset of the original string that gets reordered, cleaned up, or has its direction determined.
This unicode::bidi_cleanup does not trim off the passed in string and embedding level buffer, since it affects only a subset of the string. The number of times the removed character callback gets invoked indicates how much the substring should be trimmed off.
• unicode::bidi_override modifies the passed-in string as follows:
• unicode::bidi_cleanup () is applied with the specified, or defaulted, cleanup_options
• Either the LRO or an RLO override marker gets prepended to the Unicode string, forcing the entire string to be interpreted in a single rendering direction, when processed by the Unicode bi-directional algorithm.
unicode::bidi_override makes it possible to use a Unicode-aware application or algorithm in a context that only works with text that's always displayed in a fixed direction, allowing graceful handling of input containing bi-directional text.
unicode::literals namespace
using namespace unicode::literals;
std::u32string
foo(std::u32string bar)
{
|
return bar + LRO; |
}
This namespace contains the following constexpr definitions:
• char32_t arrays with literal Unicode character strings containing Unicode directional, isolate, and override markers, like LRO, RLO and others.
• CLEANUP_EXTRA, CLEANUP_BNL, and CLEANUP_CANONICAL options for unicode::bidi_cleanup ().
SEE ALSO
courier-unicode (7), unicode_bidi (3).
AUTHOR
Sam Varshavchik
Author
NOTES
|
1. |
Unicode Bi-Directional algorithm |
https://www.unicode.org/reports/tr9/tr9-48.html