Added implementation of a 'natural' string compare function originally contributed to tacentview by github user ClangPan.

bluescan · bluescan · commit a7b34b78a94e · 2024-07-30T23:57:56.000-07:00
diff --git a/Modules/Foundation/Inc/Foundation/tStandard.h b/Modules/Foundation/Inc/Foundation/tStandard.h
@@ -53,23 +53,24 @@ inline const void* tMemsrch(const void* haystack, int haystackNumBytes, const vo
 // straight UTF conversions (not null-terminated). String termination is not part of UTF, but it's common to support it.
 // Null-terminated versions of the functions have an 's' appended. The 'c' versions are for dealing with individual
 // codepoints. Note these functions return exactly -1 if a < b, 0 if equal, and 1 if a > b. This is in contrast to the
-// standard strcmp functions that only guarantee returning < 0, 0, or > 0. That is, implementations are free to return
-// either the ASCII difference of the strings or normalize the returns to -1, 0, 1.
+// standard strcmp functions that only guarantee returning < 0, 0, or > 0. That is, standard implementations are free to
+// return either the ASCII difference of the strings or normalize the returns to -1, 0, 1, but it's not helpful as you
+// can't be sure of which choice an implementation may have made.
 const int tCharInvalid																									= 0xFF;
 inline int tStrcmp(const char* a, const char* b)																		{ tAssert(a && b); int r = strcmp(a, b); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
+inline int tStrcmp(const char8_t* a, const char8_t* b)																	{ return tStrcmp((const char*)a, (const char*)b); }
 inline int tStrncmp(const char* a, const char* b, int n)																{ tAssert(a && b && n >= 0); int r = strncmp(a, b, n); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
-inline int tStrcmp(const char8_t* a, const char8_t* b)																	{ tAssert(a && b); int r = strcmp((const char*)a, (const char*)b); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
-inline int tStrncmp(const char8_t* a, const char8_t* b, int n)															{ tAssert(a && b && n >= 0); int r = strncmp((const char*)a, (const char*)b, n); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
+inline int tStrncmp(const char8_t* a, const char8_t* b, int n)															{ return tStrncmp((const char*)a, (const char*)b, n); }
 #if defined(PLATFORM_WINDOWS)
 inline int tStricmp(const char* a, const char* b)																		{ tAssert(a && b); int r = stricmp(a, b); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
+inline int tStricmp(const char8_t* a, const char8_t* b)																	{ return tStricmp((const char*)a, (const char*)b); }
 inline int tStrnicmp(const char* a, const char* b, int n)																{ tAssert(a && b && n >= 0); int r = strnicmp(a, b, n); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
-inline int tStricmp(const char8_t* a, const char8_t* b)																	{ tAssert(a && b); int r = stricmp((const char*)a, (const char*)b); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
-inline int tStrnicmp(const char8_t* a, const char8_t* b, int n)															{ tAssert(a && b && n >= 0); int r = strnicmp((const char*)a, (const char*)b, n); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
+inline int tStrnicmp(const char8_t* a, const char8_t* b, int n)															{ return tStrnicmp((const char*)a, (const char*)b, n); }
 #else
 inline int tStricmp(const char* a, const char* b)																		{ tAssert(a && b); int r = strcasecmp(a, b); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
+inline int tStricmp(const char8_t* a, const char8_t* b)																	{ return tStricmp((const char*)a, (const char*)b); }
 inline int tStrnicmp(const char* a, const char* b, int n)																{ tAssert(a && b && n >= 0); int r = strncasecmp(a, b, n); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
-inline int tStricmp(const char8_t* a, const char8_t* b)																	{ tAssert(a && b); int r = strcasecmp((const char*)a, (const char*)b); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
-inline int tStrnicmp(const char8_t* a, const char8_t* b, int n)															{ tAssert(a && b && n >= 0); int r = strncasecmp((const char*)a, (const char*)b, n); return (r < 0) ? -1 : ((r > 0) ? 1 : 0); }
+inline int tStrnicmp(const char8_t* a, const char8_t* b, int n)															{ return tStrnicmp((const char*)a, (const char*)b, n); }
 #endif
 
 // These are similar to the above but assume the strings represent filesystem paths and so choose between a case
@@ -80,6 +81,12 @@ int tPstrncmp(const char* a, const char* b, int n);
 int tPstrcmp(const char8_t* a, const char8_t* b);
 int tPstrncmp(const char8_t* a, const char8_t* b, int n);
 
+// These do a 'natural' string compare by treating groups of base 10 digits as separate objects to be compared by
+// numeric value rather than alpha-numerically based on the encoding. This results in strings like "page10" coming
+// after "page2" because 10 > 2.
+int tNstrcmp(const char* a, const char* b);
+inline int tNstrcmp(const char8_t* a, const char8_t* b)																	{ return tNstrcmp((const char*)a, (const char*)b); }
+
 inline int tStrlen(const char* s)																						{ tAssert(s); return int(strlen(s)); }
 inline constexpr int tStrlenCT(const char* s)																			{ return *s ? 1 + tStrlenCT(s + 1) : 0; }
 inline char* tStrcpy(char* dst, const char* src)																		{ tAssert(dst && src); return strcpy(dst, src); }
diff --git a/Modules/Foundation/Src/tStandard.cpp b/Modules/Foundation/Src/tStandard.cpp
@@ -3,7 +3,7 @@
 // Tacent functions and types that are standard across all platforms. Includes global functions like itoa which are not
 // available on some platforms, but are common enough that they should be.
 //
-// Copyright (c) 2004-2006, 2015, 2023 Tristan Grimmer.
+// Copyright (c) 2004-2006, 2015, 2023, 2024 Tristan Grimmer.
 // Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby
 // granted, provided that the above copyright notice and this permission notice appear in all copies.
 //
@@ -62,6 +62,77 @@ void* tStd::tMemsrch(void* haystack, int haystackNumBytes, void* needle, int nee
 }
 
 
+int tStd::tNstrcmp(const char* a, const char* b)
+{
+	// This implementation of tNstrcmp was written by GitHub user ClangPan.
+	enum class Mode
+	{
+		String,
+		Number
+	};
+	Mode mode = Mode::String;
+
+	while (*a && *b)
+	{
+		if (mode == Mode::String)
+		{
+			char aChar, bChar;
+			while ((aChar = tolower(*a)) && (bChar = tolower(*b))) // We lowercase the chars for proper comparison
+			{
+				// Check if the chars are digits
+				const bool aDigit = isdigit(aChar), bDigit = isdigit(bChar);
+
+				// If both chars are digits, we continue in NUMBER mode
+				if (aDigit && bDigit)
+				{
+					mode = Mode::Number;
+					break;
+				} 
+
+				// If only the left char is a digit, we have a result
+				if (aDigit) return -1;
+
+				// If only the right char is a digit, we have a result
+				if (bDigit) return +1;
+
+				// compute the difference of both characters
+				const int diff = aChar - bChar;
+
+				// If they differ we have a result
+				if (diff != 0) return diff;
+
+				// Otherwise process the next characters
+				++a; ++b;
+			}
+		}
+		else
+		{
+			char *end; // Represents the end of the number string
+
+			// Get the left number
+			unsigned long aInt = strtoul((char*) a, &end, 10);
+			a = end;
+
+			// Get the right number
+			unsigned long bInt = strtoul((char*) b, &end, 10);
+			b = end;
+
+			// if the difference is not equal to zero, we have a comparison result
+			const long diff = aInt - bInt;
+			if (diff != 0) return diff;
+
+			// otherwise we process the next substring in STRING mode
+			mode = Mode::String;
+		}
+	}
+
+	if (*b) return -1;
+	if (*a) return +1;
+
+	return 0;
+}
+
+
 bool tStd::tStrtob(const char* str)
 {
 	tString lower(str);
diff --git a/README.md b/README.md
@@ -168,6 +168,7 @@ Credits are found directly in the code where appropriate. Here is a list of some
 * Xiaolin Wu for the Wu colour quantizer.
 * Derrick Coetzee for the Scolorq spatial colour quantizer.
 * Khronos Group and Mark Callow for KTX-Software.
+* GitHub user ClangPan for the implementation of tNstrcmp.
 
 
 ### Legal