From: Ahmad Fatoum <a.fatoum@pengutronix.de>
To: barebox@lists.infradead.org
Cc: Ahmad Fatoum <a.fatoum@pengutronix.de>
Subject: [PATCH 1/2] lib: import Linux UCS2 library functions
Date: Thu, 11 Dec 2025 21:45:51 +0100 [thread overview]
Message-ID: <20251211204555.2694174-1-a.fatoum@pengutronix.de> (raw)
Our wide character functions are not actually capable of dealing with an
extended character set. Import the UCS2 (basically UTF-16 without
surrogate pairs) support from Linux for this purpose.
This will be used later for the EFI support.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
include/linux/ucs2_string.h | 22 +++++
lib/Makefile | 2 +-
lib/ucs2_string.c | 181 ++++++++++++++++++++++++++++++++++++
3 files changed, 204 insertions(+), 1 deletion(-)
create mode 100644 include/linux/ucs2_string.h
create mode 100644 lib/ucs2_string.c
diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h
new file mode 100644
index 000000000000..5bce9d697fa2
--- /dev/null
+++ b/include/linux/ucs2_string.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-Comment: Origin-URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/ucs2_string.h?id=e4c89f9380017b6b2e63836e2de1af8eb4535384 */
+#ifndef _LINUX_UCS2_STRING_H_
+#define _LINUX_UCS2_STRING_H_
+
+#include <linux/types.h> /* for size_t */
+#include <linux/stddef.h> /* for NULL */
+
+typedef u16 ucs2_char_t;
+
+unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength);
+unsigned long ucs2_strlen(const ucs2_char_t *s);
+unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength);
+ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count);
+int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len);
+int ucs2_strcmp(const ucs2_char_t *a, const ucs2_char_t *b);
+
+unsigned long ucs2_utf8size(const ucs2_char_t *src);
+unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src,
+ unsigned long maxlength);
+
+#endif /* _LINUX_UCS2_STRING_H_ */
diff --git a/lib/Makefile b/lib/Makefile
index 9ab4cad0359c..38343fdbafcc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -68,7 +68,7 @@ obj-y += gui/
obj-$(CONFIG_XYMODEM) += xymodem.o
obj-y += unlink-recursive.o
obj-$(CONFIG_STMP_DEVICE) += stmp-device.o
-obj-y += wchar.o
+obj-y += ucs2_string.o wchar.o
obj-$(CONFIG_FUZZ) += fuzz.o
obj-y += libfile.o
obj-y += bitmap.o
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
new file mode 100644
index 000000000000..db3f34f6b70c
--- /dev/null
+++ b/lib/ucs2_string.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-Comment: Origin-URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/ucs2_string.c?id=91640531b92ec63e260b9d5c681d387676ec462c
+
+#include <linux/ucs2_string.h>
+#include <linux/module.h>
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/limits.h>
+
+/* Return the number of unicode characters in data */
+unsigned long
+ucs2_strnlen(const ucs2_char_t *s, size_t maxlength)
+{
+ unsigned long length = 0;
+
+ while (*s++ != 0 && length < maxlength)
+ length++;
+ return length;
+}
+EXPORT_SYMBOL(ucs2_strnlen);
+
+unsigned long
+ucs2_strlen(const ucs2_char_t *s)
+{
+ return ucs2_strnlen(s, ~0UL);
+}
+EXPORT_SYMBOL(ucs2_strlen);
+
+/*
+ * Return the number of bytes is the length of this string
+ * Note: this is NOT the same as the number of unicode characters
+ */
+unsigned long
+ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength)
+{
+ return ucs2_strnlen(data, maxlength/sizeof(ucs2_char_t)) * sizeof(ucs2_char_t);
+}
+EXPORT_SYMBOL(ucs2_strsize);
+
+/**
+ * ucs2_strscpy() - Copy a UCS2 string into a sized buffer.
+ *
+ * @dst: Pointer to the destination buffer where to copy the string to.
+ * @src: Pointer to the source buffer where to copy the string from.
+ * @count: Size of the destination buffer, in UCS2 (16-bit) characters.
+ *
+ * Like strscpy(), only for UCS2 strings.
+ *
+ * Copy the source string @src, or as much of it as fits, into the destination
+ * buffer @dst. The behavior is undefined if the string buffers overlap. The
+ * destination buffer @dst is always NUL-terminated, unless it's zero-sized.
+ *
+ * Return: The number of characters copied into @dst (excluding the trailing
+ * %NUL terminator) or -E2BIG if @count is 0 or @src was truncated due to the
+ * destination buffer being too small.
+ */
+ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count)
+{
+ long res;
+
+ /*
+ * Ensure that we have a valid amount of space. We need to store at
+ * least one NUL-character.
+ */
+ if (count == 0 || WARN_ON_ONCE(count > INT_MAX / sizeof(*dst)))
+ return -E2BIG;
+
+ /*
+ * Copy at most 'count' characters, return early if we find a
+ * NUL-terminator.
+ */
+ for (res = 0; res < count; res++) {
+ ucs2_char_t c;
+
+ c = src[res];
+ dst[res] = c;
+
+ if (!c)
+ return res;
+ }
+
+ /*
+ * The loop above terminated without finding a NUL-terminator,
+ * exceeding the 'count': Enforce proper NUL-termination and return
+ * error.
+ */
+ dst[count - 1] = 0;
+ return -E2BIG;
+}
+EXPORT_SYMBOL(ucs2_strscpy);
+
+int
+ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len)
+{
+ while (1) {
+ if (len == 0)
+ return 0;
+ if (*a < *b)
+ return -1;
+ if (*a > *b)
+ return 1;
+ if (*a == 0) /* implies *b == 0 */
+ return 0;
+ a++;
+ b++;
+ len--;
+ }
+}
+EXPORT_SYMBOL(ucs2_strncmp);
+
+int
+ucs2_strcmp(const ucs2_char_t *a, const ucs2_char_t *b)
+{
+ return ucs2_strncmp(a, b, ~0UL);
+}
+EXPORT_SYMBOL(ucs2_strncmp);
+
+unsigned long
+ucs2_utf8size(const ucs2_char_t *src)
+{
+ unsigned long i;
+ unsigned long j = 0;
+
+ for (i = 0; src[i]; i++) {
+ u16 c = src[i];
+
+ if (c >= 0x800)
+ j += 3;
+ else if (c >= 0x80)
+ j += 2;
+ else
+ j += 1;
+ }
+
+ return j;
+}
+EXPORT_SYMBOL(ucs2_utf8size);
+
+/*
+ * copy at most maxlength bytes of whole utf8 characters to dest from the
+ * ucs2 string src.
+ *
+ * The return value is the number of characters copied, not including the
+ * final NUL character.
+ */
+unsigned long
+ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
+{
+ unsigned int i;
+ unsigned long j = 0;
+ unsigned long limit = ucs2_strnlen(src, maxlength);
+
+ for (i = 0; maxlength && i < limit; i++) {
+ u16 c = src[i];
+
+ if (c >= 0x800) {
+ if (maxlength < 3)
+ break;
+ maxlength -= 3;
+ dest[j++] = 0xe0 | (c & 0xf000) >> 12;
+ dest[j++] = 0x80 | (c & 0x0fc0) >> 6;
+ dest[j++] = 0x80 | (c & 0x003f);
+ } else if (c >= 0x80) {
+ if (maxlength < 2)
+ break;
+ maxlength -= 2;
+ dest[j++] = 0xc0 | (c & 0x7c0) >> 6;
+ dest[j++] = 0x80 | (c & 0x03f);
+ } else {
+ maxlength -= 1;
+ dest[j++] = c & 0x7f;
+ }
+ }
+ if (maxlength)
+ dest[j] = '\0';
+ return j;
+}
+EXPORT_SYMBOL(ucs2_as_utf8);
+
+MODULE_DESCRIPTION("UCS2 string handling");
+MODULE_LICENSE("GPL v2");
--
2.47.3
next reply other threads:[~2025-12-11 20:46 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-11 20:45 Ahmad Fatoum [this message]
2025-12-11 20:45 ` [PATCH 2/2] lib: wchar: reimplement in terms of Linux UCS2 helpers Ahmad Fatoum
2025-12-15 7:10 ` [PATCH 1/2] lib: import Linux UCS2 library functions Sascha Hauer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251211204555.2694174-1-a.fatoum@pengutronix.de \
--to=a.fatoum@pengutronix.de \
--cc=barebox@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox