LCOV - code coverage report
Current view: top level - tty/vt - ucs.c (source / functions) Coverage Total Hit
Test: TTY Combined Coverage Lines: 0.0 % 103 0
Test Date: 2025-08-26 15:45:50 Functions: 0.0 % 11 0

            Line data    Source code
       1              : // SPDX-License-Identifier: GPL-2.0
       2              : /*
       3              :  * ucs.c - Universal Character Set processing
       4              :  */
       5              : 
       6              : #include <linux/array_size.h>
       7              : #include <linux/bsearch.h>
       8              : #include <linux/consolemap.h>
       9              : #include <linux/minmax.h>
      10              : 
      11              : struct ucs_interval16 {
      12              :         u16 first;
      13              :         u16 last;
      14              : };
      15              : 
      16              : struct ucs_interval32 {
      17              :         u32 first;
      18              :         u32 last;
      19              : };
      20              : 
      21              : #include "ucs_width_table.h"
      22              : 
      23            0 : static int interval16_cmp(const void *key, const void *element)
      24              : {
      25            0 :         u16 cp = *(u16 *)key;
      26            0 :         const struct ucs_interval16 *entry = element;
      27              : 
      28            0 :         if (cp < entry->first)
      29            0 :                 return -1;
      30            0 :         if (cp > entry->last)
      31            0 :                 return 1;
      32            0 :         return 0;
      33            0 : }
      34              : 
      35            0 : static int interval32_cmp(const void *key, const void *element)
      36              : {
      37            0 :         u32 cp = *(u32 *)key;
      38            0 :         const struct ucs_interval32 *entry = element;
      39              : 
      40            0 :         if (cp < entry->first)
      41            0 :                 return -1;
      42            0 :         if (cp > entry->last)
      43            0 :                 return 1;
      44            0 :         return 0;
      45            0 : }
      46              : 
      47            0 : static bool cp_in_range16(u16 cp, const struct ucs_interval16 *ranges, size_t size)
      48              : {
      49            0 :         if (cp < ranges[0].first || cp > ranges[size - 1].last)
      50            0 :                 return false;
      51              : 
      52            0 :         return __inline_bsearch(&cp, ranges, size, sizeof(*ranges),
      53            0 :                                 interval16_cmp) != NULL;
      54            0 : }
      55              : 
      56            0 : static bool cp_in_range32(u32 cp, const struct ucs_interval32 *ranges, size_t size)
      57              : {
      58            0 :         if (cp < ranges[0].first || cp > ranges[size - 1].last)
      59            0 :                 return false;
      60              : 
      61            0 :         return __inline_bsearch(&cp, ranges, size, sizeof(*ranges),
      62            0 :                                 interval32_cmp) != NULL;
      63            0 : }
      64              : 
      65              : #define UCS_IS_BMP(cp)  ((cp) <= 0xffff)
      66              : 
      67              : /**
      68              :  * ucs_is_zero_width() - Determine if a Unicode code point is zero-width.
      69              :  * @cp: Unicode code point (UCS-4)
      70              :  *
      71              :  * Return: true if the character is zero-width, false otherwise
      72              :  */
      73            0 : bool ucs_is_zero_width(u32 cp)
      74              : {
      75            0 :         if (UCS_IS_BMP(cp))
      76            0 :                 return cp_in_range16(cp, ucs_zero_width_bmp_ranges,
      77              :                                      ARRAY_SIZE(ucs_zero_width_bmp_ranges));
      78              :         else
      79            0 :                 return cp_in_range32(cp, ucs_zero_width_non_bmp_ranges,
      80              :                                      ARRAY_SIZE(ucs_zero_width_non_bmp_ranges));
      81            0 : }
      82              : 
      83              : /**
      84              :  * ucs_is_double_width() - Determine if a Unicode code point is double-width.
      85              :  * @cp: Unicode code point (UCS-4)
      86              :  *
      87              :  * Return: true if the character is double-width, false otherwise
      88              :  */
      89            0 : bool ucs_is_double_width(u32 cp)
      90              : {
      91            0 :         if (UCS_IS_BMP(cp))
      92            0 :                 return cp_in_range16(cp, ucs_double_width_bmp_ranges,
      93              :                                      ARRAY_SIZE(ucs_double_width_bmp_ranges));
      94              :         else
      95            0 :                 return cp_in_range32(cp, ucs_double_width_non_bmp_ranges,
      96              :                                      ARRAY_SIZE(ucs_double_width_non_bmp_ranges));
      97            0 : }
      98              : 
      99              : /*
     100              :  * Structure for base with combining mark pairs and resulting recompositions.
     101              :  * Using u16 to save space since all values are within BMP range.
     102              :  */
     103              : struct ucs_recomposition {
     104              :         u16 base;       /* base character */
     105              :         u16 mark;       /* combining mark */
     106              :         u16 recomposed; /* corresponding recomposed character */
     107              : };
     108              : 
     109              : #include "ucs_recompose_table.h"
     110              : 
     111              : struct compare_key {
     112              :         u16 base;
     113              :         u16 mark;
     114              : };
     115              : 
     116            0 : static int recomposition_cmp(const void *key, const void *element)
     117              : {
     118            0 :         const struct compare_key *search_key = key;
     119            0 :         const struct ucs_recomposition *entry = element;
     120              : 
     121              :         /* Compare base character first */
     122            0 :         if (search_key->base < entry->base)
     123            0 :                 return -1;
     124            0 :         if (search_key->base > entry->base)
     125            0 :                 return 1;
     126              : 
     127              :         /* Base characters match, now compare combining character */
     128            0 :         if (search_key->mark < entry->mark)
     129            0 :                 return -1;
     130            0 :         if (search_key->mark > entry->mark)
     131            0 :                 return 1;
     132              : 
     133              :         /* Both match */
     134            0 :         return 0;
     135            0 : }
     136              : 
     137              : /**
     138              :  * ucs_recompose() - Attempt to recompose two Unicode characters into a single character.
     139              :  * @base: Base Unicode code point (UCS-4)
     140              :  * @mark: Combining mark Unicode code point (UCS-4)
     141              :  *
     142              :  * Return: Recomposed Unicode code point, or 0 if no recomposition is possible
     143              :  */
     144            0 : u32 ucs_recompose(u32 base, u32 mark)
     145              : {
     146              :         /* Check if characters are within the range of our table */
     147            0 :         if (base < UCS_RECOMPOSE_MIN_BASE || base > UCS_RECOMPOSE_MAX_BASE ||
     148            0 :             mark < UCS_RECOMPOSE_MIN_MARK || mark > UCS_RECOMPOSE_MAX_MARK)
     149            0 :                 return 0;
     150              : 
     151            0 :         struct compare_key key = { base, mark };
     152            0 :         struct ucs_recomposition *result =
     153            0 :                 __inline_bsearch(&key, ucs_recomposition_table,
     154              :                                  ARRAY_SIZE(ucs_recomposition_table),
     155              :                                  sizeof(*ucs_recomposition_table),
     156              :                                  recomposition_cmp);
     157              : 
     158            0 :         return result ? result->recomposed : 0;
     159            0 : }
     160              : 
     161              : /*
     162              :  * The fallback table structures implement a 2-level lookup.
     163              :  */
     164              : 
     165              : struct ucs_page_desc {
     166              :         u8 page;        /* Page index (high byte of code points) */
     167              :         u8 count;       /* Number of entries in this page */
     168              :         u16 start;      /* Start index in entries array */
     169              : };
     170              : 
     171              : struct ucs_page_entry {
     172              :         u8 offset;      /* Offset within page (0-255) */
     173              :         u8 fallback;    /* Fallback character or range start marker */
     174              : };
     175              : 
     176              : #include "ucs_fallback_table.h"
     177              : 
     178            0 : static int ucs_page_desc_cmp(const void *key, const void *element)
     179              : {
     180            0 :         u8 page = *(u8 *)key;
     181            0 :         const struct ucs_page_desc *entry = element;
     182              : 
     183            0 :         if (page < entry->page)
     184            0 :                 return -1;
     185            0 :         if (page > entry->page)
     186            0 :                 return 1;
     187            0 :         return 0;
     188            0 : }
     189              : 
     190            0 : static int ucs_page_entry_cmp(const void *key, const void *element)
     191              : {
     192            0 :         u8 offset = *(u8 *)key;
     193            0 :         const struct ucs_page_entry *entry = element;
     194              : 
     195            0 :         if (offset < entry->offset)
     196            0 :                 return -1;
     197            0 :         if (entry->fallback == UCS_PAGE_ENTRY_RANGE_MARKER) {
     198            0 :                 if (offset > entry[1].offset)
     199            0 :                         return 1;
     200            0 :         } else {
     201            0 :                 if (offset > entry->offset)
     202            0 :                         return 1;
     203              :         }
     204            0 :         return 0;
     205            0 : }
     206              : 
     207              : /**
     208              :  * ucs_get_fallback() - Get a substitution for the provided Unicode character
     209              :  * @cp: Unicode code point (UCS-4)
     210              :  *
     211              :  * Get a simpler fallback character for the provided Unicode character.
     212              :  * This is used for terminal display when corresponding glyph is unavailable.
     213              :  * The substitution may not be as good as the actual glyph for the original
     214              :  * character but still way more helpful than a squared question mark.
     215              :  *
     216              :  * Return: Fallback Unicode code point, or 0 if none is available
     217              :  */
     218            0 : u32 ucs_get_fallback(u32 cp)
     219              : {
     220            0 :         const struct ucs_page_desc *page;
     221            0 :         const struct ucs_page_entry *entry;
     222            0 :         u8 page_idx = cp >> 8, offset = cp;
     223              : 
     224            0 :         if (!UCS_IS_BMP(cp))
     225            0 :                 return 0;
     226              : 
     227              :         /*
     228              :          * Full-width to ASCII mapping (covering all printable ASCII 33-126)
     229              :          * 0xFF01 (!) to 0xFF5E (~) -> ASCII 33 (!) to 126 (~)
     230              :          * We process them programmatically to reduce the table size.
     231              :          */
     232            0 :         if (cp >= 0xFF01 && cp <= 0xFF5E)
     233            0 :                 return cp - 0xFF01 + 33;
     234              : 
     235            0 :         page = __inline_bsearch(&page_idx, ucs_fallback_pages,
     236              :                                 ARRAY_SIZE(ucs_fallback_pages),
     237              :                                 sizeof(*ucs_fallback_pages),
     238              :                                 ucs_page_desc_cmp);
     239            0 :         if (!page)
     240            0 :                 return 0;
     241              : 
     242            0 :         entry = __inline_bsearch(&offset, ucs_fallback_entries + page->start,
     243            0 :                                  page->count, sizeof(*ucs_fallback_entries),
     244              :                                  ucs_page_entry_cmp);
     245            0 :         if (!entry)
     246            0 :                 return 0;
     247              : 
     248            0 :         if (entry->fallback == UCS_PAGE_ENTRY_RANGE_MARKER)
     249            0 :                 entry++;
     250            0 :         return entry->fallback;
     251            0 : }
        

Generated by: LCOV version 2.0-1