From 4debd4f3de6cdc8dee354a5736fd358c345a282e Mon Sep 17 00:00:00 2001 From: Stefan Ravn van Overeem Date: Tue, 1 Aug 2023 09:39:14 +0200 Subject: [PATCH] Extract extend codepoints out in seperate array (#713) IsCombining() is a hotpath when rendering the screen. Most of the time is spent doing bisearch on the word break interval list to check it characters has the extend property. Create a seperate table for intervals with the extend property, so they can just be searched in isCombining(). Also some of the intervals in the word break interval list were consecutive and could be combined to reduce the size of the list. This gives a ~25% performance improvement when running the benchmark --- src/ftxui/screen/string.cpp | 587 ++++++++++-------------------------- 1 file changed, 158 insertions(+), 429 deletions(-) diff --git a/src/ftxui/screen/string.cpp b/src/ftxui/screen/string.cpp index c1baf16..897c21d 100644 --- a/src/ftxui/screen/string.cpp +++ b/src/ftxui/screen/string.cpp @@ -24,7 +24,7 @@ struct Interval { }; // As of Unicode 13.0.0 -const std::array g_full_width_characters = {{ +static constexpr std::array g_full_width_characters = {{ {0x01100, 0x0115f}, {0x0231a, 0x0231b}, {0x02329, 0x0232a}, {0x023e9, 0x023ec}, {0x023f0, 0x023f0}, {0x023f3, 0x023f3}, {0x025fd, 0x025fe}, {0x02614, 0x02615}, {0x02648, 0x02653}, @@ -75,7 +75,7 @@ struct WordBreakPropertyInterval { // Properties from: // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt -const std::array g_word_break_intervals = {{ +static constexpr std::array g_word_break_intervals = {{ {0x0000A, 0x0000A, WBP::LF}, {0x0000B, 0x0000C, WBP::Newline}, {0x0000D, 0x0000D, WBP::CR}, @@ -98,30 +98,12 @@ const std::array g_word_break_intervals = {{ {0x000BA, 0x000BA, WBP::ALetter}, {0x000C0, 0x000D6, WBP::ALetter}, {0x000D8, 0x000F6, WBP::ALetter}, - {0x000F8, 0x001BA, WBP::ALetter}, - {0x001BB, 0x001BB, WBP::ALetter}, - {0x001BC, 0x001BF, WBP::ALetter}, - {0x001C0, 0x001C3, WBP::ALetter}, - {0x001C4, 0x00293, WBP::ALetter}, - {0x00294, 0x00294, WBP::ALetter}, - {0x00295, 0x002AF, WBP::ALetter}, - {0x002B0, 0x002C1, WBP::ALetter}, - {0x002C2, 0x002C5, WBP::ALetter}, - {0x002C6, 0x002D1, WBP::ALetter}, - {0x002D2, 0x002D7, WBP::ALetter}, - {0x002DE, 0x002DF, WBP::ALetter}, - {0x002E0, 0x002E4, WBP::ALetter}, - {0x002E5, 0x002EB, WBP::ALetter}, - {0x002EC, 0x002EC, WBP::ALetter}, - {0x002ED, 0x002ED, WBP::ALetter}, - {0x002EE, 0x002EE, WBP::ALetter}, - {0x002EF, 0x002FF, WBP::ALetter}, + {0x000F8, 0x002D7, WBP::ALetter}, + {0x002DE, 0x002FF, WBP::ALetter}, {0x00300, 0x0036F, WBP::Extend}, - {0x00370, 0x00373, WBP::ALetter}, - {0x00374, 0x00374, WBP::ALetter}, + {0x00370, 0x00374, WBP::ALetter}, {0x00376, 0x00377, WBP::ALetter}, - {0x0037A, 0x0037A, WBP::ALetter}, - {0x0037B, 0x0037D, WBP::ALetter}, + {0x0037A, 0x0037D, WBP::ALetter}, {0x0037E, 0x0037E, WBP::MidNum}, {0x0037F, 0x0037F, WBP::ALetter}, {0x00386, 0x00386, WBP::ALetter}, @@ -131,12 +113,10 @@ const std::array g_word_break_intervals = {{ {0x0038E, 0x003A1, WBP::ALetter}, {0x003A3, 0x003F5, WBP::ALetter}, {0x003F7, 0x00481, WBP::ALetter}, - {0x00483, 0x00487, WBP::Extend}, - {0x00488, 0x00489, WBP::Extend}, + {0x00483, 0x00489, WBP::Extend}, {0x0048A, 0x0052F, WBP::ALetter}, {0x00531, 0x00556, WBP::ALetter}, - {0x00559, 0x00559, WBP::ALetter}, - {0x0055A, 0x0055C, WBP::ALetter}, + {0x00559, 0x0055C, WBP::ALetter}, {0x0055E, 0x0055E, WBP::ALetter}, {0x0055F, 0x0055F, WBP::MidLetter}, {0x00560, 0x00588, WBP::ALetter}, @@ -155,9 +135,7 @@ const std::array g_word_break_intervals = {{ {0x0060C, 0x0060D, WBP::MidNum}, {0x00610, 0x0061A, WBP::Extend}, {0x0061C, 0x0061C, WBP::Format}, - {0x00620, 0x0063F, WBP::ALetter}, - {0x00640, 0x00640, WBP::ALetter}, - {0x00641, 0x0064A, WBP::ALetter}, + {0x00620, 0x0064A, WBP::ALetter}, {0x0064B, 0x0065F, WBP::Extend}, {0x00660, 0x00669, WBP::Numeric}, {0x0066B, 0x0066B, WBP::Numeric}, @@ -206,27 +184,18 @@ const std::array g_word_break_intervals = {{ {0x008B6, 0x008C7, WBP::ALetter}, {0x008D3, 0x008E1, WBP::Extend}, {0x008E2, 0x008E2, WBP::Format}, - {0x008E3, 0x00902, WBP::Extend}, - {0x00903, 0x00903, WBP::Extend}, + {0x008E3, 0x00903, WBP::Extend}, {0x00904, 0x00939, WBP::ALetter}, - {0x0093A, 0x0093A, WBP::Extend}, - {0x0093B, 0x0093B, WBP::Extend}, - {0x0093C, 0x0093C, WBP::Extend}, + {0x0093A, 0x0093C, WBP::Extend}, {0x0093D, 0x0093D, WBP::ALetter}, - {0x0093E, 0x00940, WBP::Extend}, - {0x00941, 0x00948, WBP::Extend}, - {0x00949, 0x0094C, WBP::Extend}, - {0x0094D, 0x0094D, WBP::Extend}, - {0x0094E, 0x0094F, WBP::Extend}, + {0x0093E, 0x0094F, WBP::Extend}, {0x00950, 0x00950, WBP::ALetter}, {0x00951, 0x00957, WBP::Extend}, {0x00958, 0x00961, WBP::ALetter}, {0x00962, 0x00963, WBP::Extend}, {0x00966, 0x0096F, WBP::Numeric}, - {0x00971, 0x00971, WBP::ALetter}, - {0x00972, 0x00980, WBP::ALetter}, - {0x00981, 0x00981, WBP::Extend}, - {0x00982, 0x00983, WBP::Extend}, + {0x00971, 0x00980, WBP::ALetter}, + {0x00981, 0x00983, WBP::Extend}, {0x00985, 0x0098C, WBP::ALetter}, {0x0098F, 0x00990, WBP::ALetter}, {0x00993, 0x009A8, WBP::ALetter}, @@ -235,11 +204,9 @@ const std::array g_word_break_intervals = {{ {0x009B6, 0x009B9, WBP::ALetter}, {0x009BC, 0x009BC, WBP::Extend}, {0x009BD, 0x009BD, WBP::ALetter}, - {0x009BE, 0x009C0, WBP::Extend}, - {0x009C1, 0x009C4, WBP::Extend}, + {0x009BE, 0x009C4, WBP::Extend}, {0x009C7, 0x009C8, WBP::Extend}, - {0x009CB, 0x009CC, WBP::Extend}, - {0x009CD, 0x009CD, WBP::Extend}, + {0x009CB, 0x009CD, WBP::Extend}, {0x009CE, 0x009CE, WBP::ALetter}, {0x009D7, 0x009D7, WBP::Extend}, {0x009DC, 0x009DD, WBP::ALetter}, @@ -249,8 +216,7 @@ const std::array g_word_break_intervals = {{ {0x009F0, 0x009F1, WBP::ALetter}, {0x009FC, 0x009FC, WBP::ALetter}, {0x009FE, 0x009FE, WBP::Extend}, - {0x00A01, 0x00A02, WBP::Extend}, - {0x00A03, 0x00A03, WBP::Extend}, + {0x00A01, 0x00A03, WBP::Extend}, {0x00A05, 0x00A0A, WBP::ALetter}, {0x00A0F, 0x00A10, WBP::ALetter}, {0x00A13, 0x00A28, WBP::ALetter}, @@ -259,8 +225,7 @@ const std::array g_word_break_intervals = {{ {0x00A35, 0x00A36, WBP::ALetter}, {0x00A38, 0x00A39, WBP::ALetter}, {0x00A3C, 0x00A3C, WBP::Extend}, - {0x00A3E, 0x00A40, WBP::Extend}, - {0x00A41, 0x00A42, WBP::Extend}, + {0x00A3E, 0x00A42, WBP::Extend}, {0x00A47, 0x00A48, WBP::Extend}, {0x00A4B, 0x00A4D, WBP::Extend}, {0x00A51, 0x00A51, WBP::Extend}, @@ -270,8 +235,7 @@ const std::array g_word_break_intervals = {{ {0x00A70, 0x00A71, WBP::Extend}, {0x00A72, 0x00A74, WBP::ALetter}, {0x00A75, 0x00A75, WBP::Extend}, - {0x00A81, 0x00A82, WBP::Extend}, - {0x00A83, 0x00A83, WBP::Extend}, + {0x00A81, 0x00A83, WBP::Extend}, {0x00A85, 0x00A8D, WBP::ALetter}, {0x00A8F, 0x00A91, WBP::ALetter}, {0x00A93, 0x00AA8, WBP::ALetter}, @@ -280,20 +244,16 @@ const std::array g_word_break_intervals = {{ {0x00AB5, 0x00AB9, WBP::ALetter}, {0x00ABC, 0x00ABC, WBP::Extend}, {0x00ABD, 0x00ABD, WBP::ALetter}, - {0x00ABE, 0x00AC0, WBP::Extend}, - {0x00AC1, 0x00AC5, WBP::Extend}, - {0x00AC7, 0x00AC8, WBP::Extend}, - {0x00AC9, 0x00AC9, WBP::Extend}, - {0x00ACB, 0x00ACC, WBP::Extend}, - {0x00ACD, 0x00ACD, WBP::Extend}, + {0x00ABE, 0x00AC5, WBP::Extend}, + {0x00AC7, 0x00AC9, WBP::Extend}, + {0x00ACB, 0x00ACD, WBP::Extend}, {0x00AD0, 0x00AD0, WBP::ALetter}, {0x00AE0, 0x00AE1, WBP::ALetter}, {0x00AE2, 0x00AE3, WBP::Extend}, {0x00AE6, 0x00AEF, WBP::Numeric}, {0x00AF9, 0x00AF9, WBP::ALetter}, {0x00AFA, 0x00AFF, WBP::Extend}, - {0x00B01, 0x00B01, WBP::Extend}, - {0x00B02, 0x00B03, WBP::Extend}, + {0x00B01, 0x00B03, WBP::Extend}, {0x00B05, 0x00B0C, WBP::ALetter}, {0x00B0F, 0x00B10, WBP::ALetter}, {0x00B13, 0x00B28, WBP::ALetter}, @@ -302,15 +262,10 @@ const std::array g_word_break_intervals = {{ {0x00B35, 0x00B39, WBP::ALetter}, {0x00B3C, 0x00B3C, WBP::Extend}, {0x00B3D, 0x00B3D, WBP::ALetter}, - {0x00B3E, 0x00B3E, WBP::Extend}, - {0x00B3F, 0x00B3F, WBP::Extend}, - {0x00B40, 0x00B40, WBP::Extend}, - {0x00B41, 0x00B44, WBP::Extend}, + {0x00B3E, 0x00B44, WBP::Extend}, {0x00B47, 0x00B48, WBP::Extend}, - {0x00B4B, 0x00B4C, WBP::Extend}, - {0x00B4D, 0x00B4D, WBP::Extend}, - {0x00B55, 0x00B56, WBP::Extend}, - {0x00B57, 0x00B57, WBP::Extend}, + {0x00B4B, 0x00B4D, WBP::Extend}, + {0x00B55, 0x00B57, WBP::Extend}, {0x00B5C, 0x00B5D, WBP::ALetter}, {0x00B5F, 0x00B61, WBP::ALetter}, {0x00B62, 0x00B63, WBP::Extend}, @@ -327,25 +282,19 @@ const std::array g_word_break_intervals = {{ {0x00BA3, 0x00BA4, WBP::ALetter}, {0x00BA8, 0x00BAA, WBP::ALetter}, {0x00BAE, 0x00BB9, WBP::ALetter}, - {0x00BBE, 0x00BBF, WBP::Extend}, - {0x00BC0, 0x00BC0, WBP::Extend}, - {0x00BC1, 0x00BC2, WBP::Extend}, + {0x00BBE, 0x00BC2, WBP::Extend}, {0x00BC6, 0x00BC8, WBP::Extend}, - {0x00BCA, 0x00BCC, WBP::Extend}, - {0x00BCD, 0x00BCD, WBP::Extend}, + {0x00BCA, 0x00BCD, WBP::Extend}, {0x00BD0, 0x00BD0, WBP::ALetter}, {0x00BD7, 0x00BD7, WBP::Extend}, {0x00BE6, 0x00BEF, WBP::Numeric}, - {0x00C00, 0x00C00, WBP::Extend}, - {0x00C01, 0x00C03, WBP::Extend}, - {0x00C04, 0x00C04, WBP::Extend}, + {0x00C00, 0x00C04, WBP::Extend}, {0x00C05, 0x00C0C, WBP::ALetter}, {0x00C0E, 0x00C10, WBP::ALetter}, {0x00C12, 0x00C28, WBP::ALetter}, {0x00C2A, 0x00C39, WBP::ALetter}, {0x00C3D, 0x00C3D, WBP::ALetter}, - {0x00C3E, 0x00C40, WBP::Extend}, - {0x00C41, 0x00C44, WBP::Extend}, + {0x00C3E, 0x00C44, WBP::Extend}, {0x00C46, 0x00C48, WBP::Extend}, {0x00C4A, 0x00C4D, WBP::Extend}, {0x00C55, 0x00C56, WBP::Extend}, @@ -354,8 +303,7 @@ const std::array g_word_break_intervals = {{ {0x00C62, 0x00C63, WBP::Extend}, {0x00C66, 0x00C6F, WBP::Numeric}, {0x00C80, 0x00C80, WBP::ALetter}, - {0x00C81, 0x00C81, WBP::Extend}, - {0x00C82, 0x00C83, WBP::Extend}, + {0x00C81, 0x00C83, WBP::Extend}, {0x00C85, 0x00C8C, WBP::ALetter}, {0x00C8E, 0x00C90, WBP::ALetter}, {0x00C92, 0x00CA8, WBP::ALetter}, @@ -363,31 +311,24 @@ const std::array g_word_break_intervals = {{ {0x00CB5, 0x00CB9, WBP::ALetter}, {0x00CBC, 0x00CBC, WBP::Extend}, {0x00CBD, 0x00CBD, WBP::ALetter}, - {0x00CBE, 0x00CBE, WBP::Extend}, - {0x00CBF, 0x00CBF, WBP::Extend}, - {0x00CC0, 0x00CC4, WBP::Extend}, - {0x00CC6, 0x00CC6, WBP::Extend}, - {0x00CC7, 0x00CC8, WBP::Extend}, - {0x00CCA, 0x00CCB, WBP::Extend}, - {0x00CCC, 0x00CCD, WBP::Extend}, + {0x00CBE, 0x00CC4, WBP::Extend}, + {0x00CC6, 0x00CC8, WBP::Extend}, + {0x00CCA, 0x00CCD, WBP::Extend}, {0x00CD5, 0x00CD6, WBP::Extend}, {0x00CDE, 0x00CDE, WBP::ALetter}, {0x00CE0, 0x00CE1, WBP::ALetter}, {0x00CE2, 0x00CE3, WBP::Extend}, {0x00CE6, 0x00CEF, WBP::Numeric}, {0x00CF1, 0x00CF2, WBP::ALetter}, - {0x00D00, 0x00D01, WBP::Extend}, - {0x00D02, 0x00D03, WBP::Extend}, + {0x00D00, 0x00D03, WBP::Extend}, {0x00D04, 0x00D0C, WBP::ALetter}, {0x00D0E, 0x00D10, WBP::ALetter}, {0x00D12, 0x00D3A, WBP::ALetter}, {0x00D3B, 0x00D3C, WBP::Extend}, {0x00D3D, 0x00D3D, WBP::ALetter}, - {0x00D3E, 0x00D40, WBP::Extend}, - {0x00D41, 0x00D44, WBP::Extend}, + {0x00D3E, 0x00D44, WBP::Extend}, {0x00D46, 0x00D48, WBP::Extend}, - {0x00D4A, 0x00D4C, WBP::Extend}, - {0x00D4D, 0x00D4D, WBP::Extend}, + {0x00D4A, 0x00D4D, WBP::Extend}, {0x00D4E, 0x00D4E, WBP::ALetter}, {0x00D54, 0x00D56, WBP::ALetter}, {0x00D57, 0x00D57, WBP::Extend}, @@ -395,16 +336,14 @@ const std::array g_word_break_intervals = {{ {0x00D62, 0x00D63, WBP::Extend}, {0x00D66, 0x00D6F, WBP::Numeric}, {0x00D7A, 0x00D7F, WBP::ALetter}, - {0x00D81, 0x00D81, WBP::Extend}, - {0x00D82, 0x00D83, WBP::Extend}, + {0x00D81, 0x00D83, WBP::Extend}, {0x00D85, 0x00D96, WBP::ALetter}, {0x00D9A, 0x00DB1, WBP::ALetter}, {0x00DB3, 0x00DBB, WBP::ALetter}, {0x00DBD, 0x00DBD, WBP::ALetter}, {0x00DC0, 0x00DC6, WBP::ALetter}, {0x00DCA, 0x00DCA, WBP::Extend}, - {0x00DCF, 0x00DD1, WBP::Extend}, - {0x00DD2, 0x00DD4, WBP::Extend}, + {0x00DCF, 0x00DD4, WBP::Extend}, {0x00DD6, 0x00DD6, WBP::Extend}, {0x00DD8, 0x00DDF, WBP::Extend}, {0x00DE6, 0x00DEF, WBP::Numeric}, @@ -426,45 +365,28 @@ const std::array g_word_break_intervals = {{ {0x00F3E, 0x00F3F, WBP::Extend}, {0x00F40, 0x00F47, WBP::ALetter}, {0x00F49, 0x00F6C, WBP::ALetter}, - {0x00F71, 0x00F7E, WBP::Extend}, - {0x00F7F, 0x00F7F, WBP::Extend}, - {0x00F80, 0x00F84, WBP::Extend}, + {0x00F71, 0x00F84, WBP::Extend}, {0x00F86, 0x00F87, WBP::Extend}, {0x00F88, 0x00F8C, WBP::ALetter}, {0x00F8D, 0x00F97, WBP::Extend}, {0x00F99, 0x00FBC, WBP::Extend}, {0x00FC6, 0x00FC6, WBP::Extend}, - {0x0102B, 0x0102C, WBP::Extend}, - {0x0102D, 0x01030, WBP::Extend}, - {0x01031, 0x01031, WBP::Extend}, - {0x01032, 0x01037, WBP::Extend}, - {0x01038, 0x01038, WBP::Extend}, - {0x01039, 0x0103A, WBP::Extend}, - {0x0103B, 0x0103C, WBP::Extend}, - {0x0103D, 0x0103E, WBP::Extend}, + {0x0102B, 0x0103E, WBP::Extend}, {0x01040, 0x01049, WBP::Numeric}, - {0x01056, 0x01057, WBP::Extend}, - {0x01058, 0x01059, WBP::Extend}, + {0x01056, 0x01059, WBP::Extend}, {0x0105E, 0x01060, WBP::Extend}, {0x01062, 0x01064, WBP::Extend}, {0x01067, 0x0106D, WBP::Extend}, {0x01071, 0x01074, WBP::Extend}, - {0x01082, 0x01082, WBP::Extend}, - {0x01083, 0x01084, WBP::Extend}, - {0x01085, 0x01086, WBP::Extend}, - {0x01087, 0x0108C, WBP::Extend}, - {0x0108D, 0x0108D, WBP::Extend}, + {0x01082, 0x0108D, WBP::Extend}, {0x0108F, 0x0108F, WBP::Extend}, {0x01090, 0x01099, WBP::Numeric}, - {0x0109A, 0x0109C, WBP::Extend}, - {0x0109D, 0x0109D, WBP::Extend}, + {0x0109A, 0x0109D, WBP::Extend}, {0x010A0, 0x010C5, WBP::ALetter}, {0x010C7, 0x010C7, WBP::ALetter}, {0x010CD, 0x010CD, WBP::ALetter}, {0x010D0, 0x010FA, WBP::ALetter}, - {0x010FC, 0x010FC, WBP::ALetter}, - {0x010FD, 0x010FF, WBP::ALetter}, - {0x01100, 0x01248, WBP::ALetter}, + {0x010FC, 0x01248, WBP::ALetter}, {0x0124A, 0x0124D, WBP::ALetter}, {0x01250, 0x01256, WBP::ALetter}, {0x01258, 0x01258, WBP::ALetter}, @@ -489,8 +411,7 @@ const std::array g_word_break_intervals = {{ {0x01680, 0x01680, WBP::WSegSpace}, {0x01681, 0x0169A, WBP::ALetter}, {0x016A0, 0x016EA, WBP::ALetter}, - {0x016EE, 0x016F0, WBP::ALetter}, - {0x016F1, 0x016F8, WBP::ALetter}, + {0x016EE, 0x016F8, WBP::ALetter}, {0x01700, 0x0170C, WBP::ALetter}, {0x0170E, 0x01711, WBP::ALetter}, {0x01712, 0x01714, WBP::Extend}, @@ -501,21 +422,13 @@ const std::array g_word_break_intervals = {{ {0x01760, 0x0176C, WBP::ALetter}, {0x0176E, 0x01770, WBP::ALetter}, {0x01772, 0x01773, WBP::Extend}, - {0x017B4, 0x017B5, WBP::Extend}, - {0x017B6, 0x017B6, WBP::Extend}, - {0x017B7, 0x017BD, WBP::Extend}, - {0x017BE, 0x017C5, WBP::Extend}, - {0x017C6, 0x017C6, WBP::Extend}, - {0x017C7, 0x017C8, WBP::Extend}, - {0x017C9, 0x017D3, WBP::Extend}, + {0x017B4, 0x017D3, WBP::Extend}, {0x017DD, 0x017DD, WBP::Extend}, {0x017E0, 0x017E9, WBP::Numeric}, {0x0180B, 0x0180D, WBP::Extend}, {0x0180E, 0x0180E, WBP::Format}, {0x01810, 0x01819, WBP::Numeric}, - {0x01820, 0x01842, WBP::ALetter}, - {0x01843, 0x01843, WBP::ALetter}, - {0x01844, 0x01878, WBP::ALetter}, + {0x01820, 0x01878, WBP::ALetter}, {0x01880, 0x01884, WBP::ALetter}, {0x01885, 0x01886, WBP::Extend}, {0x01887, 0x018A8, WBP::ALetter}, @@ -523,102 +436,50 @@ const std::array g_word_break_intervals = {{ {0x018AA, 0x018AA, WBP::ALetter}, {0x018B0, 0x018F5, WBP::ALetter}, {0x01900, 0x0191E, WBP::ALetter}, - {0x01920, 0x01922, WBP::Extend}, - {0x01923, 0x01926, WBP::Extend}, - {0x01927, 0x01928, WBP::Extend}, - {0x01929, 0x0192B, WBP::Extend}, - {0x01930, 0x01931, WBP::Extend}, - {0x01932, 0x01932, WBP::Extend}, - {0x01933, 0x01938, WBP::Extend}, - {0x01939, 0x0193B, WBP::Extend}, + {0x01920, 0x0192B, WBP::Extend}, + {0x01930, 0x0193B, WBP::Extend}, {0x01946, 0x0194F, WBP::Numeric}, {0x019D0, 0x019D9, WBP::Numeric}, {0x01A00, 0x01A16, WBP::ALetter}, - {0x01A17, 0x01A18, WBP::Extend}, - {0x01A19, 0x01A1A, WBP::Extend}, - {0x01A1B, 0x01A1B, WBP::Extend}, - {0x01A55, 0x01A55, WBP::Extend}, - {0x01A56, 0x01A56, WBP::Extend}, - {0x01A57, 0x01A57, WBP::Extend}, - {0x01A58, 0x01A5E, WBP::Extend}, - {0x01A60, 0x01A60, WBP::Extend}, - {0x01A61, 0x01A61, WBP::Extend}, - {0x01A62, 0x01A62, WBP::Extend}, - {0x01A63, 0x01A64, WBP::Extend}, - {0x01A65, 0x01A6C, WBP::Extend}, - {0x01A6D, 0x01A72, WBP::Extend}, - {0x01A73, 0x01A7C, WBP::Extend}, + {0x01A17, 0x01A1B, WBP::Extend}, + {0x01A55, 0x01A5E, WBP::Extend}, + {0x01A60, 0x01A7C, WBP::Extend}, {0x01A7F, 0x01A7F, WBP::Extend}, {0x01A80, 0x01A89, WBP::Numeric}, {0x01A90, 0x01A99, WBP::Numeric}, - {0x01AB0, 0x01ABD, WBP::Extend}, - {0x01ABE, 0x01ABE, WBP::Extend}, - {0x01ABF, 0x01AC0, WBP::Extend}, - {0x01B00, 0x01B03, WBP::Extend}, - {0x01B04, 0x01B04, WBP::Extend}, + {0x01AB0, 0x01AC0, WBP::Extend}, + {0x01B00, 0x01B04, WBP::Extend}, {0x01B05, 0x01B33, WBP::ALetter}, - {0x01B34, 0x01B34, WBP::Extend}, - {0x01B35, 0x01B35, WBP::Extend}, - {0x01B36, 0x01B3A, WBP::Extend}, - {0x01B3B, 0x01B3B, WBP::Extend}, - {0x01B3C, 0x01B3C, WBP::Extend}, - {0x01B3D, 0x01B41, WBP::Extend}, - {0x01B42, 0x01B42, WBP::Extend}, - {0x01B43, 0x01B44, WBP::Extend}, + {0x01B34, 0x01B44, WBP::Extend}, {0x01B45, 0x01B4B, WBP::ALetter}, {0x01B50, 0x01B59, WBP::Numeric}, {0x01B6B, 0x01B73, WBP::Extend}, - {0x01B80, 0x01B81, WBP::Extend}, - {0x01B82, 0x01B82, WBP::Extend}, + {0x01B80, 0x01B82, WBP::Extend}, {0x01B83, 0x01BA0, WBP::ALetter}, - {0x01BA1, 0x01BA1, WBP::Extend}, - {0x01BA2, 0x01BA5, WBP::Extend}, - {0x01BA6, 0x01BA7, WBP::Extend}, - {0x01BA8, 0x01BA9, WBP::Extend}, - {0x01BAA, 0x01BAA, WBP::Extend}, - {0x01BAB, 0x01BAD, WBP::Extend}, + {0x01BA1, 0x01BAD, WBP::Extend}, {0x01BAE, 0x01BAF, WBP::ALetter}, {0x01BB0, 0x01BB9, WBP::Numeric}, {0x01BBA, 0x01BE5, WBP::ALetter}, - {0x01BE6, 0x01BE6, WBP::Extend}, - {0x01BE7, 0x01BE7, WBP::Extend}, - {0x01BE8, 0x01BE9, WBP::Extend}, - {0x01BEA, 0x01BEC, WBP::Extend}, - {0x01BED, 0x01BED, WBP::Extend}, - {0x01BEE, 0x01BEE, WBP::Extend}, - {0x01BEF, 0x01BF1, WBP::Extend}, - {0x01BF2, 0x01BF3, WBP::Extend}, + {0x01BE6, 0x01BF3, WBP::Extend}, {0x01C00, 0x01C23, WBP::ALetter}, - {0x01C24, 0x01C2B, WBP::Extend}, - {0x01C2C, 0x01C33, WBP::Extend}, - {0x01C34, 0x01C35, WBP::Extend}, - {0x01C36, 0x01C37, WBP::Extend}, + {0x01C24, 0x01C37, WBP::Extend}, {0x01C40, 0x01C49, WBP::Numeric}, {0x01C4D, 0x01C4F, WBP::ALetter}, {0x01C50, 0x01C59, WBP::Numeric}, - {0x01C5A, 0x01C77, WBP::ALetter}, - {0x01C78, 0x01C7D, WBP::ALetter}, + {0x01C5A, 0x01C7D, WBP::ALetter}, {0x01C80, 0x01C88, WBP::ALetter}, {0x01C90, 0x01CBA, WBP::ALetter}, {0x01CBD, 0x01CBF, WBP::ALetter}, {0x01CD0, 0x01CD2, WBP::Extend}, - {0x01CD4, 0x01CE0, WBP::Extend}, - {0x01CE1, 0x01CE1, WBP::Extend}, - {0x01CE2, 0x01CE8, WBP::Extend}, + {0x01CD4, 0x01CE8, WBP::Extend}, {0x01CE9, 0x01CEC, WBP::ALetter}, {0x01CED, 0x01CED, WBP::Extend}, {0x01CEE, 0x01CF3, WBP::ALetter}, {0x01CF4, 0x01CF4, WBP::Extend}, {0x01CF5, 0x01CF6, WBP::ALetter}, - {0x01CF7, 0x01CF7, WBP::Extend}, - {0x01CF8, 0x01CF9, WBP::Extend}, + {0x01CF7, 0x01CF9, WBP::Extend}, {0x01CFA, 0x01CFA, WBP::ALetter}, - {0x01D00, 0x01D2B, WBP::ALetter}, - {0x01D2C, 0x01D6A, WBP::ALetter}, - {0x01D6B, 0x01D77, WBP::ALetter}, - {0x01D78, 0x01D78, WBP::ALetter}, - {0x01D79, 0x01D9A, WBP::ALetter}, - {0x01D9B, 0x01DBF, WBP::ALetter}, + {0x01D00, 0x01DBF, WBP::ALetter}, {0x01DC0, 0x01DF9, WBP::Extend}, {0x01DFB, 0x01DFF, WBP::Extend}, {0x01E00, 0x01F15, WBP::ALetter}, @@ -645,12 +506,10 @@ const std::array g_word_break_intervals = {{ {0x0200C, 0x0200C, WBP::Extend}, {0x0200D, 0x0200D, WBP::ZWJ}, {0x0200E, 0x0200F, WBP::Format}, - {0x02018, 0x02018, WBP::MidNumLet}, - {0x02019, 0x02019, WBP::MidNumLet}, + {0x02018, 0x02019, WBP::MidNumLet}, {0x02024, 0x02024, WBP::MidNumLet}, {0x02027, 0x02027, WBP::MidLetter}, - {0x02028, 0x02028, WBP::Newline}, - {0x02029, 0x02029, WBP::Newline}, + {0x02028, 0x02029, WBP::Newline}, {0x0202A, 0x0202E, WBP::Format}, {0x0202F, 0x0202F, WBP::ExtendNumLet}, {0x0203F, 0x02040, WBP::ExtendNumLet}, @@ -662,11 +521,7 @@ const std::array g_word_break_intervals = {{ {0x02071, 0x02071, WBP::ALetter}, {0x0207F, 0x0207F, WBP::ALetter}, {0x02090, 0x0209C, WBP::ALetter}, - {0x020D0, 0x020DC, WBP::Extend}, - {0x020DD, 0x020E0, WBP::Extend}, - {0x020E1, 0x020E1, WBP::Extend}, - {0x020E2, 0x020E4, WBP::Extend}, - {0x020E5, 0x020F0, WBP::Extend}, + {0x020D0, 0x020F0, WBP::Extend}, {0x02102, 0x02102, WBP::ALetter}, {0x02107, 0x02107, WBP::ALetter}, {0x0210A, 0x02113, WBP::ALetter}, @@ -676,21 +531,15 @@ const std::array g_word_break_intervals = {{ {0x02126, 0x02126, WBP::ALetter}, {0x02128, 0x02128, WBP::ALetter}, {0x0212A, 0x0212D, WBP::ALetter}, - {0x0212F, 0x02134, WBP::ALetter}, - {0x02135, 0x02138, WBP::ALetter}, - {0x02139, 0x02139, WBP::ALetter}, + {0x0212F, 0x02139, WBP::ALetter}, {0x0213C, 0x0213F, WBP::ALetter}, {0x02145, 0x02149, WBP::ALetter}, {0x0214E, 0x0214E, WBP::ALetter}, - {0x02160, 0x02182, WBP::ALetter}, - {0x02183, 0x02184, WBP::ALetter}, - {0x02185, 0x02188, WBP::ALetter}, + {0x02160, 0x02188, WBP::ALetter}, {0x024B6, 0x024E9, WBP::ALetter}, {0x02C00, 0x02C2E, WBP::ALetter}, {0x02C30, 0x02C5E, WBP::ALetter}, - {0x02C60, 0x02C7B, WBP::ALetter}, - {0x02C7C, 0x02C7D, WBP::ALetter}, - {0x02C7E, 0x02CE4, WBP::ALetter}, + {0x02C60, 0x02CE4, WBP::ALetter}, {0x02CEB, 0x02CEE, WBP::ALetter}, {0x02CEF, 0x02CF1, WBP::Extend}, {0x02CF2, 0x02CF3, WBP::ALetter}, @@ -713,77 +562,47 @@ const std::array g_word_break_intervals = {{ {0x02E2F, 0x02E2F, WBP::ALetter}, {0x03000, 0x03000, WBP::WSegSpace}, {0x03005, 0x03005, WBP::ALetter}, - {0x0302A, 0x0302D, WBP::Extend}, - {0x0302E, 0x0302F, WBP::Extend}, + {0x0302A, 0x0302F, WBP::Extend}, {0x03031, 0x03035, WBP::Katakana}, - {0x0303B, 0x0303B, WBP::ALetter}, - {0x0303C, 0x0303C, WBP::ALetter}, + {0x0303B, 0x0303C, WBP::ALetter}, {0x03099, 0x0309A, WBP::Extend}, {0x0309B, 0x0309C, WBP::Katakana}, - {0x030A0, 0x030A0, WBP::Katakana}, - {0x030A1, 0x030FA, WBP::Katakana}, - {0x030FC, 0x030FE, WBP::Katakana}, - {0x030FF, 0x030FF, WBP::Katakana}, + {0x030A0, 0x030FA, WBP::Katakana}, + {0x030FC, 0x030FF, WBP::Katakana}, {0x03105, 0x0312F, WBP::ALetter}, {0x03131, 0x0318E, WBP::ALetter}, {0x031A0, 0x031BF, WBP::ALetter}, {0x031F0, 0x031FF, WBP::Katakana}, {0x032D0, 0x032FE, WBP::Katakana}, {0x03300, 0x03357, WBP::Katakana}, - {0x0A000, 0x0A014, WBP::ALetter}, - {0x0A015, 0x0A015, WBP::ALetter}, - {0x0A016, 0x0A48C, WBP::ALetter}, - {0x0A4D0, 0x0A4F7, WBP::ALetter}, - {0x0A4F8, 0x0A4FD, WBP::ALetter}, - {0x0A500, 0x0A60B, WBP::ALetter}, - {0x0A60C, 0x0A60C, WBP::ALetter}, + {0x0A000, 0x0A48C, WBP::ALetter}, + {0x0A4D0, 0x0A4FD, WBP::ALetter}, + {0x0A500, 0x0A60C, WBP::ALetter}, {0x0A610, 0x0A61F, WBP::ALetter}, {0x0A620, 0x0A629, WBP::Numeric}, {0x0A62A, 0x0A62B, WBP::ALetter}, - {0x0A640, 0x0A66D, WBP::ALetter}, - {0x0A66E, 0x0A66E, WBP::ALetter}, - {0x0A66F, 0x0A66F, WBP::Extend}, - {0x0A670, 0x0A672, WBP::Extend}, + {0x0A640, 0x0A66E, WBP::ALetter}, + {0x0A66F, 0x0A672, WBP::Extend}, {0x0A674, 0x0A67D, WBP::Extend}, - {0x0A67F, 0x0A67F, WBP::ALetter}, - {0x0A680, 0x0A69B, WBP::ALetter}, - {0x0A69C, 0x0A69D, WBP::ALetter}, + {0x0A67F, 0x0A69D, WBP::ALetter}, {0x0A69E, 0x0A69F, WBP::Extend}, - {0x0A6A0, 0x0A6E5, WBP::ALetter}, - {0x0A6E6, 0x0A6EF, WBP::ALetter}, + {0x0A6A0, 0x0A6EF, WBP::ALetter}, {0x0A6F0, 0x0A6F1, WBP::Extend}, - {0x0A708, 0x0A716, WBP::ALetter}, - {0x0A717, 0x0A71F, WBP::ALetter}, - {0x0A720, 0x0A721, WBP::ALetter}, - {0x0A722, 0x0A76F, WBP::ALetter}, - {0x0A770, 0x0A770, WBP::ALetter}, - {0x0A771, 0x0A787, WBP::ALetter}, - {0x0A788, 0x0A788, WBP::ALetter}, - {0x0A789, 0x0A78A, WBP::ALetter}, - {0x0A78B, 0x0A78E, WBP::ALetter}, - {0x0A78F, 0x0A78F, WBP::ALetter}, - {0x0A790, 0x0A7BF, WBP::ALetter}, + {0x0A708, 0x0A7BF, WBP::ALetter}, {0x0A7C2, 0x0A7CA, WBP::ALetter}, - {0x0A7F5, 0x0A7F6, WBP::ALetter}, - {0x0A7F7, 0x0A7F7, WBP::ALetter}, - {0x0A7F8, 0x0A7F9, WBP::ALetter}, - {0x0A7FA, 0x0A7FA, WBP::ALetter}, - {0x0A7FB, 0x0A801, WBP::ALetter}, + {0x0A7F5, 0x0A801, WBP::ALetter}, {0x0A802, 0x0A802, WBP::Extend}, {0x0A803, 0x0A805, WBP::ALetter}, {0x0A806, 0x0A806, WBP::Extend}, {0x0A807, 0x0A80A, WBP::ALetter}, {0x0A80B, 0x0A80B, WBP::Extend}, {0x0A80C, 0x0A822, WBP::ALetter}, - {0x0A823, 0x0A824, WBP::Extend}, - {0x0A825, 0x0A826, WBP::Extend}, - {0x0A827, 0x0A827, WBP::Extend}, + {0x0A823, 0x0A827, WBP::Extend}, {0x0A82C, 0x0A82C, WBP::Extend}, {0x0A840, 0x0A873, WBP::ALetter}, {0x0A880, 0x0A881, WBP::Extend}, {0x0A882, 0x0A8B3, WBP::ALetter}, - {0x0A8B4, 0x0A8C3, WBP::Extend}, - {0x0A8C4, 0x0A8C5, WBP::Extend}, + {0x0A8B4, 0x0A8C5, WBP::Extend}, {0x0A8D0, 0x0A8D9, WBP::Numeric}, {0x0A8E0, 0x0A8F1, WBP::Extend}, {0x0A8F2, 0x0A8F7, WBP::ALetter}, @@ -794,69 +613,41 @@ const std::array g_word_break_intervals = {{ {0x0A90A, 0x0A925, WBP::ALetter}, {0x0A926, 0x0A92D, WBP::Extend}, {0x0A930, 0x0A946, WBP::ALetter}, - {0x0A947, 0x0A951, WBP::Extend}, - {0x0A952, 0x0A953, WBP::Extend}, + {0x0A947, 0x0A953, WBP::Extend}, {0x0A960, 0x0A97C, WBP::ALetter}, - {0x0A980, 0x0A982, WBP::Extend}, - {0x0A983, 0x0A983, WBP::Extend}, + {0x0A980, 0x0A983, WBP::Extend}, {0x0A984, 0x0A9B2, WBP::ALetter}, - {0x0A9B3, 0x0A9B3, WBP::Extend}, - {0x0A9B4, 0x0A9B5, WBP::Extend}, - {0x0A9B6, 0x0A9B9, WBP::Extend}, - {0x0A9BA, 0x0A9BB, WBP::Extend}, - {0x0A9BC, 0x0A9BD, WBP::Extend}, - {0x0A9BE, 0x0A9C0, WBP::Extend}, + {0x0A9B3, 0x0A9C0, WBP::Extend}, {0x0A9CF, 0x0A9CF, WBP::ALetter}, {0x0A9D0, 0x0A9D9, WBP::Numeric}, {0x0A9E5, 0x0A9E5, WBP::Extend}, {0x0A9F0, 0x0A9F9, WBP::Numeric}, {0x0AA00, 0x0AA28, WBP::ALetter}, - {0x0AA29, 0x0AA2E, WBP::Extend}, - {0x0AA2F, 0x0AA30, WBP::Extend}, - {0x0AA31, 0x0AA32, WBP::Extend}, - {0x0AA33, 0x0AA34, WBP::Extend}, - {0x0AA35, 0x0AA36, WBP::Extend}, + {0x0AA29, 0x0AA36, WBP::Extend}, {0x0AA40, 0x0AA42, WBP::ALetter}, {0x0AA43, 0x0AA43, WBP::Extend}, {0x0AA44, 0x0AA4B, WBP::ALetter}, - {0x0AA4C, 0x0AA4C, WBP::Extend}, - {0x0AA4D, 0x0AA4D, WBP::Extend}, + {0x0AA4C, 0x0AA4D, WBP::Extend}, {0x0AA50, 0x0AA59, WBP::Numeric}, - {0x0AA7B, 0x0AA7B, WBP::Extend}, - {0x0AA7C, 0x0AA7C, WBP::Extend}, - {0x0AA7D, 0x0AA7D, WBP::Extend}, + {0x0AA7B, 0x0AA7D, WBP::Extend}, {0x0AAB0, 0x0AAB0, WBP::Extend}, {0x0AAB2, 0x0AAB4, WBP::Extend}, {0x0AAB7, 0x0AAB8, WBP::Extend}, {0x0AABE, 0x0AABF, WBP::Extend}, {0x0AAC1, 0x0AAC1, WBP::Extend}, {0x0AAE0, 0x0AAEA, WBP::ALetter}, - {0x0AAEB, 0x0AAEB, WBP::Extend}, - {0x0AAEC, 0x0AAED, WBP::Extend}, - {0x0AAEE, 0x0AAEF, WBP::Extend}, - {0x0AAF2, 0x0AAF2, WBP::ALetter}, - {0x0AAF3, 0x0AAF4, WBP::ALetter}, - {0x0AAF5, 0x0AAF5, WBP::Extend}, - {0x0AAF6, 0x0AAF6, WBP::Extend}, + {0x0AAEB, 0x0AAEF, WBP::Extend}, + {0x0AAF2, 0x0AAF4, WBP::ALetter}, + {0x0AAF5, 0x0AAF6, WBP::Extend}, {0x0AB01, 0x0AB06, WBP::ALetter}, {0x0AB09, 0x0AB0E, WBP::ALetter}, {0x0AB11, 0x0AB16, WBP::ALetter}, {0x0AB20, 0x0AB26, WBP::ALetter}, {0x0AB28, 0x0AB2E, WBP::ALetter}, - {0x0AB30, 0x0AB5A, WBP::ALetter}, - {0x0AB5B, 0x0AB5B, WBP::ALetter}, - {0x0AB5C, 0x0AB5F, WBP::ALetter}, - {0x0AB60, 0x0AB68, WBP::ALetter}, - {0x0AB69, 0x0AB69, WBP::ALetter}, - {0x0AB70, 0x0ABBF, WBP::ALetter}, - {0x0ABC0, 0x0ABE2, WBP::ALetter}, - {0x0ABE3, 0x0ABE4, WBP::Extend}, - {0x0ABE5, 0x0ABE5, WBP::Extend}, - {0x0ABE6, 0x0ABE7, WBP::Extend}, - {0x0ABE8, 0x0ABE8, WBP::Extend}, - {0x0ABE9, 0x0ABEA, WBP::Extend}, - {0x0ABEC, 0x0ABEC, WBP::Extend}, - {0x0ABED, 0x0ABED, WBP::Extend}, + {0x0AB30, 0x0AB69, WBP::ALetter}, + {0x0AB70, 0x0ABE2, WBP::ALetter}, + {0x0ABE3, 0x0ABEA, WBP::Extend}, + {0x0ABEC, 0x0ABED, WBP::Extend}, {0x0ABF0, 0x0ABF9, WBP::Numeric}, {0x0AC00, 0x0D7A3, WBP::ALetter}, {0x0D7B0, 0x0D7C6, WBP::ALetter}, @@ -900,9 +691,7 @@ const std::array g_word_break_intervals = {{ {0x0FF21, 0x0FF3A, WBP::ALetter}, {0x0FF3F, 0x0FF3F, WBP::ExtendNumLet}, {0x0FF41, 0x0FF5A, WBP::ALetter}, - {0x0FF66, 0x0FF6F, WBP::Katakana}, - {0x0FF70, 0x0FF70, WBP::Katakana}, - {0x0FF71, 0x0FF9D, WBP::Katakana}, + {0x0FF66, 0x0FF9D, WBP::Katakana}, {0x0FF9E, 0x0FF9F, WBP::Extend}, {0x0FFA0, 0x0FFBE, WBP::ALetter}, {0x0FFC2, 0x0FFC7, WBP::ALetter}, @@ -923,18 +712,14 @@ const std::array g_word_break_intervals = {{ {0x102A0, 0x102D0, WBP::ALetter}, {0x102E0, 0x102E0, WBP::Extend}, {0x10300, 0x1031F, WBP::ALetter}, - {0x1032D, 0x10340, WBP::ALetter}, - {0x10341, 0x10341, WBP::ALetter}, - {0x10342, 0x10349, WBP::ALetter}, - {0x1034A, 0x1034A, WBP::ALetter}, + {0x1032D, 0x1034A, WBP::ALetter}, {0x10350, 0x10375, WBP::ALetter}, {0x10376, 0x1037A, WBP::Extend}, {0x10380, 0x1039D, WBP::ALetter}, {0x103A0, 0x103C3, WBP::ALetter}, {0x103C8, 0x103CF, WBP::ALetter}, {0x103D1, 0x103D5, WBP::ALetter}, - {0x10400, 0x1044F, WBP::ALetter}, - {0x10450, 0x1049D, WBP::ALetter}, + {0x10400, 0x1049D, WBP::ALetter}, {0x104A0, 0x104A9, WBP::Numeric}, {0x104B0, 0x104D3, WBP::ALetter}, {0x104D8, 0x104FB, WBP::ALetter}, @@ -990,28 +775,20 @@ const std::array g_word_break_intervals = {{ {0x10F46, 0x10F50, WBP::Extend}, {0x10FB0, 0x10FC4, WBP::ALetter}, {0x10FE0, 0x10FF6, WBP::ALetter}, - {0x11000, 0x11000, WBP::Extend}, - {0x11001, 0x11001, WBP::Extend}, - {0x11002, 0x11002, WBP::Extend}, + {0x11000, 0x11002, WBP::Extend}, {0x11003, 0x11037, WBP::ALetter}, {0x11038, 0x11046, WBP::Extend}, {0x11066, 0x1106F, WBP::Numeric}, - {0x1107F, 0x11081, WBP::Extend}, - {0x11082, 0x11082, WBP::Extend}, + {0x1107F, 0x11082, WBP::Extend}, {0x11083, 0x110AF, WBP::ALetter}, - {0x110B0, 0x110B2, WBP::Extend}, - {0x110B3, 0x110B6, WBP::Extend}, - {0x110B7, 0x110B8, WBP::Extend}, - {0x110B9, 0x110BA, WBP::Extend}, + {0x110B0, 0x110BA, WBP::Extend}, {0x110BD, 0x110BD, WBP::Format}, {0x110CD, 0x110CD, WBP::Format}, {0x110D0, 0x110E8, WBP::ALetter}, {0x110F0, 0x110F9, WBP::Numeric}, {0x11100, 0x11102, WBP::Extend}, {0x11103, 0x11126, WBP::ALetter}, - {0x11127, 0x1112B, WBP::Extend}, - {0x1112C, 0x1112C, WBP::Extend}, - {0x1112D, 0x11134, WBP::Extend}, + {0x11127, 0x11134, WBP::Extend}, {0x11136, 0x1113F, WBP::Numeric}, {0x11144, 0x11144, WBP::ALetter}, {0x11145, 0x11146, WBP::Extend}, @@ -1019,27 +796,18 @@ const std::array g_word_break_intervals = {{ {0x11150, 0x11172, WBP::ALetter}, {0x11173, 0x11173, WBP::Extend}, {0x11176, 0x11176, WBP::ALetter}, - {0x11180, 0x11181, WBP::Extend}, - {0x11182, 0x11182, WBP::Extend}, + {0x11180, 0x11182, WBP::Extend}, {0x11183, 0x111B2, WBP::ALetter}, - {0x111B3, 0x111B5, WBP::Extend}, - {0x111B6, 0x111BE, WBP::Extend}, - {0x111BF, 0x111C0, WBP::Extend}, + {0x111B3, 0x111C0, WBP::Extend}, {0x111C1, 0x111C4, WBP::ALetter}, {0x111C9, 0x111CC, WBP::Extend}, - {0x111CE, 0x111CE, WBP::Extend}, - {0x111CF, 0x111CF, WBP::Extend}, + {0x111CE, 0x111CF, WBP::Extend}, {0x111D0, 0x111D9, WBP::Numeric}, {0x111DA, 0x111DA, WBP::ALetter}, {0x111DC, 0x111DC, WBP::ALetter}, {0x11200, 0x11211, WBP::ALetter}, {0x11213, 0x1122B, WBP::ALetter}, - {0x1122C, 0x1122E, WBP::Extend}, - {0x1122F, 0x11231, WBP::Extend}, - {0x11232, 0x11233, WBP::Extend}, - {0x11234, 0x11234, WBP::Extend}, - {0x11235, 0x11235, WBP::Extend}, - {0x11236, 0x11237, WBP::Extend}, + {0x1122C, 0x11237, WBP::Extend}, {0x1123E, 0x1123E, WBP::Extend}, {0x11280, 0x11286, WBP::ALetter}, {0x11288, 0x11288, WBP::ALetter}, @@ -1047,12 +815,9 @@ const std::array g_word_break_intervals = {{ {0x1128F, 0x1129D, WBP::ALetter}, {0x1129F, 0x112A8, WBP::ALetter}, {0x112B0, 0x112DE, WBP::ALetter}, - {0x112DF, 0x112DF, WBP::Extend}, - {0x112E0, 0x112E2, WBP::Extend}, - {0x112E3, 0x112EA, WBP::Extend}, + {0x112DF, 0x112EA, WBP::Extend}, {0x112F0, 0x112F9, WBP::Numeric}, - {0x11300, 0x11301, WBP::Extend}, - {0x11302, 0x11303, WBP::Extend}, + {0x11300, 0x11303, WBP::Extend}, {0x11305, 0x1130C, WBP::ALetter}, {0x1130F, 0x11310, WBP::ALetter}, {0x11313, 0x11328, WBP::ALetter}, @@ -1061,9 +826,7 @@ const std::array g_word_break_intervals = {{ {0x11335, 0x11339, WBP::ALetter}, {0x1133B, 0x1133C, WBP::Extend}, {0x1133D, 0x1133D, WBP::ALetter}, - {0x1133E, 0x1133F, WBP::Extend}, - {0x11340, 0x11340, WBP::Extend}, - {0x11341, 0x11344, WBP::Extend}, + {0x1133E, 0x11344, WBP::Extend}, {0x11347, 0x11348, WBP::Extend}, {0x1134B, 0x1134D, WBP::Extend}, {0x11350, 0x11350, WBP::ALetter}, @@ -1073,67 +836,33 @@ const std::array g_word_break_intervals = {{ {0x11366, 0x1136C, WBP::Extend}, {0x11370, 0x11374, WBP::Extend}, {0x11400, 0x11434, WBP::ALetter}, - {0x11435, 0x11437, WBP::Extend}, - {0x11438, 0x1143F, WBP::Extend}, - {0x11440, 0x11441, WBP::Extend}, - {0x11442, 0x11444, WBP::Extend}, - {0x11445, 0x11445, WBP::Extend}, - {0x11446, 0x11446, WBP::Extend}, + {0x11435, 0x11446, WBP::Extend}, {0x11447, 0x1144A, WBP::ALetter}, {0x11450, 0x11459, WBP::Numeric}, {0x1145E, 0x1145E, WBP::Extend}, {0x1145F, 0x11461, WBP::ALetter}, {0x11480, 0x114AF, WBP::ALetter}, - {0x114B0, 0x114B2, WBP::Extend}, - {0x114B3, 0x114B8, WBP::Extend}, - {0x114B9, 0x114B9, WBP::Extend}, - {0x114BA, 0x114BA, WBP::Extend}, - {0x114BB, 0x114BE, WBP::Extend}, - {0x114BF, 0x114C0, WBP::Extend}, - {0x114C1, 0x114C1, WBP::Extend}, - {0x114C2, 0x114C3, WBP::Extend}, + {0x114B0, 0x114C3, WBP::Extend}, {0x114C4, 0x114C5, WBP::ALetter}, {0x114C7, 0x114C7, WBP::ALetter}, {0x114D0, 0x114D9, WBP::Numeric}, {0x11580, 0x115AE, WBP::ALetter}, - {0x115AF, 0x115B1, WBP::Extend}, - {0x115B2, 0x115B5, WBP::Extend}, - {0x115B8, 0x115BB, WBP::Extend}, - {0x115BC, 0x115BD, WBP::Extend}, - {0x115BE, 0x115BE, WBP::Extend}, - {0x115BF, 0x115C0, WBP::Extend}, + {0x115AF, 0x115B5, WBP::Extend}, + {0x115B8, 0x115C0, WBP::Extend}, {0x115D8, 0x115DB, WBP::ALetter}, {0x115DC, 0x115DD, WBP::Extend}, {0x11600, 0x1162F, WBP::ALetter}, - {0x11630, 0x11632, WBP::Extend}, - {0x11633, 0x1163A, WBP::Extend}, - {0x1163B, 0x1163C, WBP::Extend}, - {0x1163D, 0x1163D, WBP::Extend}, - {0x1163E, 0x1163E, WBP::Extend}, - {0x1163F, 0x11640, WBP::Extend}, + {0x11630, 0x11640, WBP::Extend}, {0x11644, 0x11644, WBP::ALetter}, {0x11650, 0x11659, WBP::Numeric}, {0x11680, 0x116AA, WBP::ALetter}, - {0x116AB, 0x116AB, WBP::Extend}, - {0x116AC, 0x116AC, WBP::Extend}, - {0x116AD, 0x116AD, WBP::Extend}, - {0x116AE, 0x116AF, WBP::Extend}, - {0x116B0, 0x116B5, WBP::Extend}, - {0x116B6, 0x116B6, WBP::Extend}, - {0x116B7, 0x116B7, WBP::Extend}, + {0x116AB, 0x116B7, WBP::Extend}, {0x116B8, 0x116B8, WBP::ALetter}, {0x116C0, 0x116C9, WBP::Numeric}, - {0x1171D, 0x1171F, WBP::Extend}, - {0x11720, 0x11721, WBP::Extend}, - {0x11722, 0x11725, WBP::Extend}, - {0x11726, 0x11726, WBP::Extend}, - {0x11727, 0x1172B, WBP::Extend}, + {0x1171D, 0x1172B, WBP::Extend}, {0x11730, 0x11739, WBP::Numeric}, {0x11800, 0x1182B, WBP::ALetter}, - {0x1182C, 0x1182E, WBP::Extend}, - {0x1182F, 0x11837, WBP::Extend}, - {0x11838, 0x11838, WBP::Extend}, - {0x11839, 0x1183A, WBP::Extend}, + {0x1182C, 0x1183A, WBP::Extend}, {0x118A0, 0x118DF, WBP::ALetter}, {0x118E0, 0x118E9, WBP::Numeric}, {0x118FF, 0x11906, WBP::ALetter}, @@ -1143,60 +872,41 @@ const std::array g_word_break_intervals = {{ {0x11918, 0x1192F, WBP::ALetter}, {0x11930, 0x11935, WBP::Extend}, {0x11937, 0x11938, WBP::Extend}, - {0x1193B, 0x1193C, WBP::Extend}, - {0x1193D, 0x1193D, WBP::Extend}, - {0x1193E, 0x1193E, WBP::Extend}, + {0x1193B, 0x1193E, WBP::Extend}, {0x1193F, 0x1193F, WBP::ALetter}, {0x11940, 0x11940, WBP::Extend}, {0x11941, 0x11941, WBP::ALetter}, - {0x11942, 0x11942, WBP::Extend}, - {0x11943, 0x11943, WBP::Extend}, + {0x11942, 0x11943, WBP::Extend}, {0x11950, 0x11959, WBP::Numeric}, {0x119A0, 0x119A7, WBP::ALetter}, {0x119AA, 0x119D0, WBP::ALetter}, - {0x119D1, 0x119D3, WBP::Extend}, - {0x119D4, 0x119D7, WBP::Extend}, - {0x119DA, 0x119DB, WBP::Extend}, - {0x119DC, 0x119DF, WBP::Extend}, - {0x119E0, 0x119E0, WBP::Extend}, + {0x119D1, 0x119D7, WBP::Extend}, + {0x119DA, 0x119E0, WBP::Extend}, {0x119E1, 0x119E1, WBP::ALetter}, {0x119E3, 0x119E3, WBP::ALetter}, {0x119E4, 0x119E4, WBP::Extend}, {0x11A00, 0x11A00, WBP::ALetter}, {0x11A01, 0x11A0A, WBP::Extend}, {0x11A0B, 0x11A32, WBP::ALetter}, - {0x11A33, 0x11A38, WBP::Extend}, - {0x11A39, 0x11A39, WBP::Extend}, + {0x11A33, 0x11A39, WBP::Extend}, {0x11A3A, 0x11A3A, WBP::ALetter}, {0x11A3B, 0x11A3E, WBP::Extend}, {0x11A47, 0x11A47, WBP::Extend}, {0x11A50, 0x11A50, WBP::ALetter}, - {0x11A51, 0x11A56, WBP::Extend}, - {0x11A57, 0x11A58, WBP::Extend}, - {0x11A59, 0x11A5B, WBP::Extend}, + {0x11A51, 0x11A5B, WBP::Extend}, {0x11A5C, 0x11A89, WBP::ALetter}, - {0x11A8A, 0x11A96, WBP::Extend}, - {0x11A97, 0x11A97, WBP::Extend}, - {0x11A98, 0x11A99, WBP::Extend}, + {0x11A8A, 0x11A99, WBP::Extend}, {0x11A9D, 0x11A9D, WBP::ALetter}, {0x11AC0, 0x11AF8, WBP::ALetter}, {0x11C00, 0x11C08, WBP::ALetter}, {0x11C0A, 0x11C2E, WBP::ALetter}, - {0x11C2F, 0x11C2F, WBP::Extend}, - {0x11C30, 0x11C36, WBP::Extend}, - {0x11C38, 0x11C3D, WBP::Extend}, - {0x11C3E, 0x11C3E, WBP::Extend}, - {0x11C3F, 0x11C3F, WBP::Extend}, + {0x11C2F, 0x11C36, WBP::Extend}, + {0x11C38, 0x11C3F, WBP::Extend}, {0x11C40, 0x11C40, WBP::ALetter}, {0x11C50, 0x11C59, WBP::Numeric}, {0x11C72, 0x11C8F, WBP::ALetter}, {0x11C92, 0x11CA7, WBP::Extend}, - {0x11CA9, 0x11CA9, WBP::Extend}, - {0x11CAA, 0x11CB0, WBP::Extend}, - {0x11CB1, 0x11CB1, WBP::Extend}, - {0x11CB2, 0x11CB3, WBP::Extend}, - {0x11CB4, 0x11CB4, WBP::Extend}, - {0x11CB5, 0x11CB6, WBP::Extend}, + {0x11CA9, 0x11CB6, WBP::Extend}, {0x11D00, 0x11D06, WBP::ALetter}, {0x11D08, 0x11D09, WBP::ALetter}, {0x11D0B, 0x11D30, WBP::ALetter}, @@ -1212,15 +922,11 @@ const std::array g_word_break_intervals = {{ {0x11D6A, 0x11D89, WBP::ALetter}, {0x11D8A, 0x11D8E, WBP::Extend}, {0x11D90, 0x11D91, WBP::Extend}, - {0x11D93, 0x11D94, WBP::Extend}, - {0x11D95, 0x11D95, WBP::Extend}, - {0x11D96, 0x11D96, WBP::Extend}, - {0x11D97, 0x11D97, WBP::Extend}, + {0x11D93, 0x11D97, WBP::Extend}, {0x11D98, 0x11D98, WBP::ALetter}, {0x11DA0, 0x11DA9, WBP::Numeric}, {0x11EE0, 0x11EF2, WBP::ALetter}, - {0x11EF3, 0x11EF4, WBP::Extend}, - {0x11EF5, 0x11EF6, WBP::Extend}, + {0x11EF3, 0x11EF6, WBP::Extend}, {0x11FB0, 0x11FB0, WBP::ALetter}, {0x12000, 0x12399, WBP::ALetter}, {0x12400, 0x1246E, WBP::ALetter}, @@ -1258,8 +964,7 @@ const std::array g_word_break_intervals = {{ {0x1BC90, 0x1BC99, WBP::ALetter}, {0x1BC9D, 0x1BC9E, WBP::Extend}, {0x1BCA0, 0x1BCA3, WBP::Format}, - {0x1D165, 0x1D166, WBP::Extend}, - {0x1D167, 0x1D169, WBP::Extend}, + {0x1D165, 0x1D169, WBP::Extend}, {0x1D16D, 0x1D172, WBP::Extend}, {0x1D173, 0x1D17A, WBP::Format}, {0x1D17B, 0x1D182, WBP::Extend}, @@ -1366,6 +1071,30 @@ const std::array g_word_break_intervals = {{ {0xE0100, 0xE01EF, WBP::Extend}, }}; +// Construct table of just WBP::Extend character intervals +constexpr auto g_extend_characters{[]() constexpr { + // Compute number of extend character intervals + constexpr size_t size = []() constexpr { + size_t count = 0; + for (auto interval : g_word_break_intervals) { + if (interval.property == WBP::Extend) { + count++; + } + } + return count; + }(); + + // Create array of extend character intervals + std::array result{}; + size_t index = 0; + for (auto interval : g_word_break_intervals) { + if (interval.property == WBP::Extend) { + result[index++] = {interval.first, interval.last}; + } + } + return result; +}()}; + // Find a codepoint inside a sorted list of Interval. template bool Bisearch(uint32_t ucs, const std::array& table) { @@ -1546,7 +1275,7 @@ bool EatCodePoint(const std::wstring& input, } bool IsCombining(uint32_t ucs) { - return ftxui::CodepointToWordBreakProperty(ucs) == WBP::Extend; + return Bisearch(ucs, g_extend_characters); } bool IsFullWidth(uint32_t ucs) {