xaizek / vifm (License: GPLv2+) (since 2018-12-07)
Vifm is a file manager with curses interface, which provides Vi[m]-like environment for managing objects within file systems, extended with some useful ideas from mutt.
Commit b128ec5bbf7978a12d2a9f52d80378392dbf341c

Update utf8proc to v2.11.2
v2.11.0:
* Unicode 17 support.
* Documentation improvements.
* Build fix for C90, silence ASAN warning, CMake modernization.

v2.11.1:
* Correct out-of-bounds memory access when calling utf8proc_map with
both UTF8PROC_CHARBOUND and UTF8PROC_DECOMPOSE

v2.11.2:
* Fix composition for Hangul character U+11a7
Author: xaizek
Author date (UTC): 2025-10-05 08:51
Committer name: xaizek
Committer date (UTC): 2025-12-21 10:43
Parent(s): a4d3e8ab4ed2cd1625436bcc5de1c45ce612ee65
Signing key: 99DC5E4DB05F6BE2
Tree: 26b615f5593756e609eda4a0e8648ca92a7d72e0
File Lines added Lines deleted
ChangeLog 1 1
src/utils/utf8proc.c 29 25
src/utils/utf8proc.h 16 5
src/utils/utf8proc_data.inc 7148 7113
File ChangeLog changed (mode: 100644) (index 64b7d42b0..571712028)
31 31 editor automatically when "path" value is included in the value. Patch by editor automatically when "path" value is included in the value. Patch by
32 32 Kaspars Vandans. Kaspars Vandans.
33 33
34 Updated utf8proc to v2.10.0.
34 Updated utf8proc to v2.11.2.
35 35
36 36 Made documentation on which :commands can have comments a bit more Made documentation on which :commands can have comments a bit more
37 37 verbose. verbose.
File src/utils/utf8proc.c changed (mode: 100644) (index 4f2d4d01d..eb25204c8)
... ... UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
101 101 } }
102 102
103 103 UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) { UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
104 return "16.0.0";
104 return "17.0.0";
105 105 } }
106 106
107 107 UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) { UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
 
... ... static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqinde
388 388 for (; len >= 0; entry++, len--) { for (; len >= 0; entry++, len--) {
389 389 utf8proc_int32_t entry_cp = seqindex_decode_entry(&entry); utf8proc_int32_t entry_cp = seqindex_decode_entry(&entry);
390 390
391 written += utf8proc_decompose_char(
392 entry_cp,
393 (dst != NULL) ? (dst + written) : NULL,
394 (bufsize > written) ? (bufsize - written) : 0,
395 options,
396 last_boundclass
397 );
391 written += utf8proc_decompose_char(entry_cp, dst ? dst+written : dst,
392 (bufsize > written) ? (bufsize - written) : 0, options,
393 last_boundclass);
398 394 if (written < 0) return UTF8PROC_ERROR_OVERFLOW; if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
399 395 } }
400 396 return written; return written;
 
... ... UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
451 447
452 448 #define utf8proc_decompose_lump(replacement_uc) \ #define utf8proc_decompose_lump(replacement_uc) \
453 449 return utf8proc_decompose_char((replacement_uc), dst, bufsize, \ return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
454 options & ~(unsigned int)UTF8PROC_LUMP, last_boundclass)
450 (utf8proc_option_t)(options & ~(unsigned int)UTF8PROC_LUMP), last_boundclass)
455 451
456 452 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
457 453 const utf8proc_property_t *property; const utf8proc_property_t *property;
 
... ... UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
582 578 uc = custom_func(uc, custom_data); /* user-specified custom mapping */ uc = custom_func(uc, custom_data); /* user-specified custom mapping */
583 579 } }
584 580 decomp_result = utf8proc_decompose_char( decomp_result = utf8proc_decompose_char(
585 uc,
586 (buffer != NULL) ? (buffer + wpos) : NULL,
587 (bufsize > wpos) ? (bufsize - wpos) : 0,
588 options,
581 uc, buffer ? buffer+wpos : buffer, (bufsize > wpos) ? (bufsize - wpos) : 0, options,
589 582 &boundclass &boundclass
590 583 ); );
591 584 if (decomp_result < 0) return decomp_result; if (decomp_result < 0) return decomp_result;
 
... ... UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
602 595 utf8proc_int32_t uc1, uc2; utf8proc_int32_t uc1, uc2;
603 596 const utf8proc_property_t *property1, *property2; const utf8proc_property_t *property1, *property2;
604 597 uc1 = buffer[pos]; uc1 = buffer[pos];
598 if (uc1 < 0) {
599 /* skip grapheme break */
600 pos++;
601 continue;
602 }
605 603 uc2 = buffer[pos+1]; uc2 = buffer[pos+1];
604 if (uc2 < 0) {
605 /* cannot recombine; skip grapheme break */
606 pos+=2;
607 continue;
608 }
606 609 property1 = unsafe_get_property(uc1); property1 = unsafe_get_property(uc1);
607 610 property2 = unsafe_get_property(uc2); property2 = unsafe_get_property(uc2);
608 611 if (property1->combining_class > property2->combining_class && if (property1->combining_class > property2->combining_class &&
 
... ... UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
681 684 (hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) { (hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
682 685 utf8proc_int32_t hangul_tindex; utf8proc_int32_t hangul_tindex;
683 686 hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE; hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
684 if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
687 if (hangul_tindex > 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
685 688 *starter += hangul_tindex; *starter += hangul_tindex;
686 689 starter_property = NULL; starter_property = NULL;
687 690 continue; continue;
 
... ... UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
695 698 int len = starter_property->comb_length; int len = starter_property->comb_length;
696 699 utf8proc_int32_t max_second = utf8proc_combinations_second[idx + len - 1]; utf8proc_int32_t max_second = utf8proc_combinations_second[idx + len - 1];
697 700 if (current_char <= max_second) { if (current_char <= max_second) {
701 int off;
698 702 // TODO: binary search? arithmetic search? // TODO: binary search? arithmetic search?
699 for (int off = 0; off < len; ++off) {
703 for (off = 0; off < len; ++off) {
700 704 utf8proc_int32_t second = utf8proc_combinations_second[idx + off]; utf8proc_int32_t second = utf8proc_combinations_second[idx + off];
701 705 if (current_char < second) { if (current_char < second) {
702 706 /* not found */ /* not found */
 
... ... UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
796 800
797 801 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) { UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) {
798 802 utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
799 utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
800 UTF8PROC_DECOMPOSE);
803 utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE |
804 UTF8PROC_DECOMPOSE));
801 805 return retval; return retval;
802 806 } }
803 807
804 808 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) { UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) {
805 809 utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
806 utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
807 UTF8PROC_COMPOSE);
810 utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE |
811 UTF8PROC_COMPOSE));
808 812 return retval; return retval;
809 813 } }
810 814
811 815 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) { UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) {
812 816 utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
813 utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
814 UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
817 utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE |
818 UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT));
815 819 return retval; return retval;
816 820 } }
817 821
818 822 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) { UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) {
819 823 utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
820 utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
821 UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
824 utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE |
825 UTF8PROC_COMPOSE | UTF8PROC_COMPAT));
822 826 return retval; return retval;
823 827 } }
824 828
825 829 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str) { UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str) {
826 830 utf8proc_uint8_t *retval; utf8proc_uint8_t *retval;
827 utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
828 UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
831 utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE |
832 UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE));
829 833 return retval; return retval;
830 834 } }
File src/utils/utf8proc.h changed (mode: 100644) (index 039da7690..3893f6f91)
71 71 /** The MAJOR version number (increased when backwards API compatibility is broken). */ /** The MAJOR version number (increased when backwards API compatibility is broken). */
72 72 #define UTF8PROC_VERSION_MAJOR 2 #define UTF8PROC_VERSION_MAJOR 2
73 73 /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
74 #define UTF8PROC_VERSION_MINOR 10
74 #define UTF8PROC_VERSION_MINOR 11
75 75 /** The PATCH version (increased for fixes that do not change the API). */ /** The PATCH version (increased for fixes that do not change the API). */
76 #define UTF8PROC_VERSION_PATCH 0
76 #define UTF8PROC_VERSION_PATCH 2
77 77 /** @} */ /** @} */
78 78
79 79 #include <stdlib.h> #include <stdlib.h>
 
... ... typedef bool utf8proc_bool;
121 121 #include <limits.h> #include <limits.h>
122 122
123 123 #ifdef UTF8PROC_STATIC #ifdef UTF8PROC_STATIC
124 # define UTF8PROC_DLLEXPORT
124 # ifndef UTF8PROC_DLLEXPORT
125 # define UTF8PROC_DLLEXPORT
126 # endif
125 127 #else #else
126 128 # ifdef _WIN32 # ifdef _WIN32
127 129 # ifdef UTF8PROC_EXPORTS # ifdef UTF8PROC_EXPORTS
 
... ... typedef enum {
150 152 UTF8PROC_STABLE = (1<<1), UTF8PROC_STABLE = (1<<1),
151 153 /** Compatibility decomposition (i.e. formatting information is lost). */ /** Compatibility decomposition (i.e. formatting information is lost). */
152 154 UTF8PROC_COMPAT = (1<<2), UTF8PROC_COMPAT = (1<<2),
153 /** Return a result with decomposed characters. */
155 /** Return a result with composed characters. */
154 156 UTF8PROC_COMPOSE = (1<<3), UTF8PROC_COMPOSE = (1<<3),
155 157 /** Return a result with decomposed characters. */ /** Return a result with decomposed characters. */
156 158 UTF8PROC_DECOMPOSE = (1<<4), UTF8PROC_DECOMPOSE = (1<<4),
 
... ... UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
517 519 * @param dst the destination buffer. * @param dst the destination buffer.
518 520 * @param bufsize the size of the destination buffer. * @param bufsize the size of the destination buffer.
519 521 * @param options one or more of the following flags: * @param options one or more of the following flags:
520 * - @ref UTF8PROC_REJECTNA - return an error `codepoint` is unassigned
522 * - @ref UTF8PROC_REJECTNA - return an error if `codepoint` is unassigned
521 523 * - @ref UTF8PROC_IGNORE - strip "default ignorable" codepoints * - @ref UTF8PROC_IGNORE - strip "default ignorable" codepoints
522 524 * - @ref UTF8PROC_CASEFOLD - apply Unicode casefolding * - @ref UTF8PROC_CASEFOLD - apply Unicode casefolding
523 525 * - @ref UTF8PROC_COMPAT - replace certain codepoints with their * - @ref UTF8PROC_COMPAT - replace certain codepoints with their
 
... ... UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
532 534 * option is used. If the string is being processed in order, this can be initialized to 0 for * option is used. If the string is being processed in order, this can be initialized to 0 for
533 535 * the beginning of the string, and is thereafter updated automatically. Otherwise, this parameter is ignored. * the beginning of the string, and is thereafter updated automatically. Otherwise, this parameter is ignored.
534 536 * *
537 * In the current version of utf8proc, the maximum destination buffer with the @ref UTF8PROC_DECOMPOSE
538 * option is 4 elements (or double that with @ref UTF8PROC_CHARBOUND), so this is a good default size.
539 * However, this may increase in future Unicode versions, so you should always check the return value
540 * as described below.
541 *
535 542 * @return * @return
536 543 * In case of success, the number of codepoints written is returned; in case * In case of success, the number of codepoints written is returned; in case
537 544 * of an error, a negative error code is returned (utf8proc_errmsg()). * of an error, a negative error code is returned (utf8proc_errmsg()).
 
... ... UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
743 750 * *
744 751 * @note The memory of the new UTF-8 string will have been allocated * @note The memory of the new UTF-8 string will have been allocated
745 752 * with `malloc`, and should therefore be deallocated with `free`. * with `malloc`, and should therefore be deallocated with `free`.
753 *
754 * @note `utf8proc_map` simply calls `utf8proc_decompose` followed by `utf8proc_reencode`,
755 * and applications requiring greater control over memory allocation should instead call
756 * those two functions directly.
746 757 */ */
747 758 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
748 759 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
The diff for file src/utils/utf8proc_data.inc is too big (14261 changes) and cannot be shown.
Hints

Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://code.reversed.top/user/xaizek/vifm

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@code.reversed.top/user/xaizek/vifm

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a pull request:
... clone the repository ...
... make some changes and some commits ...
git push origin master