[PATCH 1/2] lib: import Linux UCS2 library functions

mail archive of the barebox mailing list
 help / color / mirror / Atom feed

* [PATCH 1/2] lib: import Linux UCS2 library functions
@ 2025-12-11 20:45 Ahmad Fatoum
  2025-12-11 20:45 ` [PATCH 2/2] lib: wchar: reimplement in terms of Linux UCS2 helpers Ahmad Fatoum
  2025-12-15  7:10 ` [PATCH 1/2] lib: import Linux UCS2 library functions Sascha Hauer
  0 siblings, 2 replies; 3+ messages in thread
From: Ahmad Fatoum @ 2025-12-11 20:45 UTC (permalink / raw)
  To: barebox; +Cc: Ahmad Fatoum

Our wide character functions are not actually capable of dealing with an
extended character set. Import the UCS2 (basically UTF-16 without
surrogate pairs) support from Linux for this purpose.

This will be used later for the EFI support.

Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
 include/linux/ucs2_string.h |  22 +++++
 lib/Makefile                |   2 +-
 lib/ucs2_string.c           | 181 ++++++++++++++++++++++++++++++++++++
 3 files changed, 204 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/ucs2_string.h
 create mode 100644 lib/ucs2_string.c

diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h
new file mode 100644
index 000000000000..5bce9d697fa2
--- /dev/null
+++ b/include/linux/ucs2_string.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-Comment: Origin-URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/ucs2_string.h?id=e4c89f9380017b6b2e63836e2de1af8eb4535384 */
+#ifndef _LINUX_UCS2_STRING_H_
+#define _LINUX_UCS2_STRING_H_
+
+#include <linux/types.h>	/* for size_t */
+#include <linux/stddef.h>	/* for NULL */
+
+typedef u16 ucs2_char_t;
+
+unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength);
+unsigned long ucs2_strlen(const ucs2_char_t *s);
+unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength);
+ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count);
+int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len);
+int ucs2_strcmp(const ucs2_char_t *a, const ucs2_char_t *b);
+
+unsigned long ucs2_utf8size(const ucs2_char_t *src);
+unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src,
+			   unsigned long maxlength);
+
+#endif /* _LINUX_UCS2_STRING_H_ */
diff --git a/lib/Makefile b/lib/Makefile
index 9ab4cad0359c..38343fdbafcc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -68,7 +68,7 @@ obj-y			+= gui/
 obj-$(CONFIG_XYMODEM)	+= xymodem.o
 obj-y			+= unlink-recursive.o
 obj-$(CONFIG_STMP_DEVICE) += stmp-device.o
-obj-y			+= wchar.o
+obj-y			+= ucs2_string.o wchar.o
 obj-$(CONFIG_FUZZ)	+= fuzz.o
 obj-y			+= libfile.o
 obj-y			+= bitmap.o
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
new file mode 100644
index 000000000000..db3f34f6b70c
--- /dev/null
+++ b/lib/ucs2_string.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-Comment: Origin-URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/ucs2_string.c?id=91640531b92ec63e260b9d5c681d387676ec462c
+
+#include <linux/ucs2_string.h>
+#include <linux/module.h>
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/limits.h>
+
+/* Return the number of unicode characters in data */
+unsigned long
+ucs2_strnlen(const ucs2_char_t *s, size_t maxlength)
+{
+        unsigned long length = 0;
+
+        while (*s++ != 0 && length < maxlength)
+                length++;
+        return length;
+}
+EXPORT_SYMBOL(ucs2_strnlen);
+
+unsigned long
+ucs2_strlen(const ucs2_char_t *s)
+{
+        return ucs2_strnlen(s, ~0UL);
+}
+EXPORT_SYMBOL(ucs2_strlen);
+
+/*
+ * Return the number of bytes is the length of this string
+ * Note: this is NOT the same as the number of unicode characters
+ */
+unsigned long
+ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength)
+{
+        return ucs2_strnlen(data, maxlength/sizeof(ucs2_char_t)) * sizeof(ucs2_char_t);
+}
+EXPORT_SYMBOL(ucs2_strsize);
+
+/**
+ * ucs2_strscpy() - Copy a UCS2 string into a sized buffer.
+ *
+ * @dst: Pointer to the destination buffer where to copy the string to.
+ * @src: Pointer to the source buffer where to copy the string from.
+ * @count: Size of the destination buffer, in UCS2 (16-bit) characters.
+ *
+ * Like strscpy(), only for UCS2 strings.
+ *
+ * Copy the source string @src, or as much of it as fits, into the destination
+ * buffer @dst. The behavior is undefined if the string buffers overlap. The
+ * destination buffer @dst is always NUL-terminated, unless it's zero-sized.
+ *
+ * Return: The number of characters copied into @dst (excluding the trailing
+ * %NUL terminator) or -E2BIG if @count is 0 or @src was truncated due to the
+ * destination buffer being too small.
+ */
+ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count)
+{
+	long res;
+
+	/*
+	 * Ensure that we have a valid amount of space. We need to store at
+	 * least one NUL-character.
+	 */
+	if (count == 0 || WARN_ON_ONCE(count > INT_MAX / sizeof(*dst)))
+		return -E2BIG;
+
+	/*
+	 * Copy at most 'count' characters, return early if we find a
+	 * NUL-terminator.
+	 */
+	for (res = 0; res < count; res++) {
+		ucs2_char_t c;
+
+		c = src[res];
+		dst[res] = c;
+
+		if (!c)
+			return res;
+	}
+
+	/*
+	 * The loop above terminated without finding a NUL-terminator,
+	 * exceeding the 'count': Enforce proper NUL-termination and return
+	 * error.
+	 */
+	dst[count - 1] = 0;
+	return -E2BIG;
+}
+EXPORT_SYMBOL(ucs2_strscpy);
+
+int
+ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len)
+{
+        while (1) {
+                if (len == 0)
+                        return 0;
+                if (*a < *b)
+                        return -1;
+                if (*a > *b)
+                        return 1;
+                if (*a == 0) /* implies *b == 0 */
+                        return 0;
+                a++;
+                b++;
+                len--;
+        }
+}
+EXPORT_SYMBOL(ucs2_strncmp);
+
+int
+ucs2_strcmp(const ucs2_char_t *a, const ucs2_char_t *b)
+{
+	return ucs2_strncmp(a, b, ~0UL);
+}
+EXPORT_SYMBOL(ucs2_strncmp);
+
+unsigned long
+ucs2_utf8size(const ucs2_char_t *src)
+{
+	unsigned long i;
+	unsigned long j = 0;
+
+	for (i = 0; src[i]; i++) {
+		u16 c = src[i];
+
+		if (c >= 0x800)
+			j += 3;
+		else if (c >= 0x80)
+			j += 2;
+		else
+			j += 1;
+	}
+
+	return j;
+}
+EXPORT_SYMBOL(ucs2_utf8size);
+
+/*
+ * copy at most maxlength bytes of whole utf8 characters to dest from the
+ * ucs2 string src.
+ *
+ * The return value is the number of characters copied, not including the
+ * final NUL character.
+ */
+unsigned long
+ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
+{
+	unsigned int i;
+	unsigned long j = 0;
+	unsigned long limit = ucs2_strnlen(src, maxlength);
+
+	for (i = 0; maxlength && i < limit; i++) {
+		u16 c = src[i];
+
+		if (c >= 0x800) {
+			if (maxlength < 3)
+				break;
+			maxlength -= 3;
+			dest[j++] = 0xe0 | (c & 0xf000) >> 12;
+			dest[j++] = 0x80 | (c & 0x0fc0) >> 6;
+			dest[j++] = 0x80 | (c & 0x003f);
+		} else if (c >= 0x80) {
+			if (maxlength < 2)
+				break;
+			maxlength -= 2;
+			dest[j++] = 0xc0 | (c & 0x7c0) >> 6;
+			dest[j++] = 0x80 | (c & 0x03f);
+		} else {
+			maxlength -= 1;
+			dest[j++] = c & 0x7f;
+		}
+	}
+	if (maxlength)
+		dest[j] = '\0';
+	return j;
+}
+EXPORT_SYMBOL(ucs2_as_utf8);
+
+MODULE_DESCRIPTION("UCS2 string handling");
+MODULE_LICENSE("GPL v2");
-- 
2.47.3




^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/2] lib: wchar: reimplement in terms of Linux UCS2 helpers
  2025-12-11 20:45 [PATCH 1/2] lib: import Linux UCS2 library functions Ahmad Fatoum
@ 2025-12-11 20:45 ` Ahmad Fatoum
  2025-12-15  7:10 ` [PATCH 1/2] lib: import Linux UCS2 library functions Sascha Hauer
  1 sibling, 0 replies; 3+ messages in thread
From: Ahmad Fatoum @ 2025-12-11 20:45 UTC (permalink / raw)
  To: barebox; +Cc: Ahmad Fatoum

Let's make use of the Linux functions for those that have direct
counterparts.

Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
 include/wchar.h | 11 ++++-----
 lib/wchar.c     | 62 ++-----------------------------------------------
 2 files changed, 7 insertions(+), 66 deletions(-)

diff --git a/include/wchar.h b/include/wchar.h
index 02818815e183..5377aa27c08a 100644
--- a/include/wchar.h
+++ b/include/wchar.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/stddef.h>
+#include <linux/ucs2_string.h>
 
 wchar_t *strdup_wchar(const wchar_t *src);
 
@@ -15,16 +16,14 @@ wchar_t *strdup_char_to_wchar(const char *src);
 
 char *strdup_wchar_to_char(const wchar_t *src);
 
-size_t wcslen(const wchar_t *s);
-
-size_t wcsnlen(const wchar_t *s, size_t maxlen);
+#define wcsnlen		ucs2_strnlen
+#define wcslen		ucs2_strlen
+#define wcsncmp		ucs2_strncmp
+#define wcscmp(s1, s2)	wcsncmp((s1), (s2), ~0UL)
 
 #define MB_CUR_MAX 4
 
 int mbtowc(wchar_t *pwc, const char *s, size_t n);
 int wctomb(char *s, wchar_t wc);
 
-int wcscmp (const wchar_t *s1, const wchar_t *s2);
-int wcsncmp (const wchar_t *s1, const wchar_t *s2, size_t n);
-
 #endif /* __WCHAR_H */
diff --git a/lib/wchar.c b/lib/wchar.c
index 96db8116286a..05961ce1bd52 100644
--- a/lib/wchar.c
+++ b/lib/wchar.c
@@ -1,42 +1,10 @@
-/*
- * wchar.c - wide character support
- *
- * Copyright (c) 2014 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: 2014 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
 
 #include <wchar.h>
 #include <malloc.h>
 #include <string.h>
 
-size_t wcslen(const wchar_t *s)
-{
-	size_t len = 0;
-
-	while (*s++)
-		len++;
-
-	return len;
-}
-
-size_t wcsnlen(const wchar_t * s, size_t count)
-{
-	const wchar_t *sc;
-
-	for (sc = s; count-- && *sc != L'\0'; ++sc)
-		/* nothing */;
-	return sc - s;
-}
-
 wchar_t *strdup_wchar(const wchar_t *src)
 {
 	int len;
@@ -124,29 +92,3 @@ char *strdup_wchar_to_char(const wchar_t *src)
 
 	return dst;
 }
-
-int wcscmp(const wchar_t *s1, const wchar_t *s2)
-{
-	while (*s1 == *s2++) {
-		if (*s1++ == 0)
-			return 0;
-	}
-
-	return *s1 - *--s2;
-}
-
-int wcsncmp (const wchar_t *s1, const wchar_t *s2, size_t n)
-{
-	if (n == 0)
-		return 0;
-
-	do {
-		if (*s1 != *s2++)
-			return *s1 - *--s2;
-
-		if (*s1++ == 0)
-			break;
-	} while (--n != 0);
-
-	return 0;
-}
-- 
2.47.3




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] lib: import Linux UCS2 library functions
  2025-12-11 20:45 [PATCH 1/2] lib: import Linux UCS2 library functions Ahmad Fatoum
  2025-12-11 20:45 ` [PATCH 2/2] lib: wchar: reimplement in terms of Linux UCS2 helpers Ahmad Fatoum
@ 2025-12-15  7:10 ` Sascha Hauer
  1 sibling, 0 replies; 3+ messages in thread
From: Sascha Hauer @ 2025-12-15  7:10 UTC (permalink / raw)
  To: barebox, Ahmad Fatoum


On Thu, 11 Dec 2025 21:45:51 +0100, Ahmad Fatoum wrote:
> Our wide character functions are not actually capable of dealing with an
> extended character set. Import the UCS2 (basically UTF-16 without
> surrogate pairs) support from Linux for this purpose.
> 
> This will be used later for the EFI support.
> 
> 
> [...]

Applied, thanks!

[1/2] lib: import Linux UCS2 library functions
      https://git.pengutronix.de/cgit/barebox/commit/?id=20d08806edb1 (link may not be stable)
[2/2] lib: wchar: reimplement in terms of Linux UCS2 helpers
      https://git.pengutronix.de/cgit/barebox/commit/?id=5687286f202b (link may not be stable)

Best regards,
-- 
Sascha Hauer <s.hauer@pengutronix.de>




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-12-15  7:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-12-11 20:45 [PATCH 1/2] lib: import Linux UCS2 library functions Ahmad Fatoum
2025-12-11 20:45 ` [PATCH 2/2] lib: wchar: reimplement in terms of Linux UCS2 helpers Ahmad Fatoum
2025-12-15  7:10 ` [PATCH 1/2] lib: import Linux UCS2 library functions Sascha Hauer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox