Tile SIMD implementation of SCMemcmp and SCMemcmpLowercase

Based on the SSE3 implementation, it checks 8 bytes at a time.
12 years ago · 22225a7e99
parent e68d44b051
commit 22225a7e99
1 changed files with 118 additions and 2 deletions
--- a/src/util-memcmp.h
+++ b/src/util-memcmp.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2010 Open Information Security Foundation
+/* Copyright (C) 2007-2013 Open Information Security Foundation
 *
 * You can copy, redistribute or modify this Program under the terms of
 * the GNU General Public License version 2 as published by the Free
@ -20,7 +20,7 @@
 *
 * \author Victor Julien <victor@inliniac.net>
 *
- * Memcmp implementations for SSE3, SSE4.1 and SSE4.2.
+ * Memcmp implementations for SSE3, SSE4.1, SSE4.2 and TILE-Gx SIMD.
 *
 * Both SCMemcmp and SCMemcmpLowercase return 0 on a exact match,
 * 1 on a failed match.
@ -342,6 +342,122 @@ static inline int SCMemcmpLowercase(void *s1, void *s2, size_t len) {
    return 0;
 }

+#elif defined(__tile__)
+
+#include <ctype.h>
+
+static inline int SCMemcmp(void *s1, void *s2, size_t len)
+{
+    uint64_t b1, w1, aligned1;
+    uint64_t b2, w2, aligned2;
+
+    if (len == 0)
+        return 0;
+
+    /* Load aligned words containing the beginning of each string.
+     * These loads don't trigger unaligned events.
+     */
+    w1 = __insn_ldna(s1);
+    w2 = __insn_ldna(s2);
+    /* Can't just read next 8 bytes because it might go past the end
+     * of a page. */
+    while (len > 8) {
+        /* Here, the buffer extends into the next word by at least one
+         * byte, so it is safe to read the next word.  Do an aligned
+         * loads on the next word.  Then use the two words to create
+         * an aligned word from each string. */
+        b1 = __insn_ldna(s1 + 8);
+        b2 = __insn_ldna(s2 + 8);
+        aligned1 = __insn_dblalign(w1, b1, s1);
+        aligned2 = __insn_dblalign(w2, b2, s2);
+        if (aligned1 != aligned2)
+            return 1;
+
+        /* Move forward one word (8 bytes) */
+        w1 = b1;
+        w2 = b2;
+        len -= 8;
+        s1 += 8;
+        s2 += 8;
+    }
+    /* Process the last up-to 8 bytes. */
+    do {
+        if (*(char*)s1 != *(char*)s2)
+            return 1;
+        s1++;
+        s2++;
+        len--;
+    } while (len);
+
+    return 0;
+}
+
+/** \brief Convert 8 characters to lower case using SIMD.
+ *  \param Word containing the 8 bytes.
+ *  \return Word containing 8-bytes each converted to lowercase.
+ */
+static inline uint64_t
+vec_tolower(uint64_t cc)
+{
+    /* For Uppercases letters, add 32 to convert to lower case. */
+    uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1);
+    uint64_t less_than_A =  __insn_v1cmpltui (cc, 'A');
+    uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A);
+    return __insn_v1add (cc,__insn_v1shli (is_upper, 5));
+}
+
+/** \brief compare two buffers in a case insensitive way
+ *  \param s1 buffer already in lowercase
+ *  \param s2 buffer with mixed upper and lowercase
+ */
+static inline int SCMemcmpLowercase(void *s1, void *s2, size_t len)
+{
+    uint64_t b1, w1, aligned1;
+    uint64_t b2, w2, aligned2;
+
+    if (len == 0)
+        return 0;
+
+    /* TODO Check for already aligned cases. To optimize. */
+
+    /* Load word containing the beginning of each string.
+     * These loads don't trigger unaligned events.
+     */
+    w1 = __insn_ldna(s1);
+    w2 = __insn_ldna(s2);
+    /* Can't just read next 8 bytes because it might go past the end
+     * of a page. */
+    while (len > 8) {
+        /* Here, the buffer extends into the next word by at least one
+         * byte, so it is safe to read the next word.  Do aligned
+         * loads on next word.  Then use the two words to create an
+         * aligned word from each string. */
+        b1 = __insn_ldna(s1 + 8);
+        b2 = __insn_ldna(s2 + 8);
+        aligned1 = __insn_dblalign(w1, b1, s1);
+        aligned2 = vec_tolower(__insn_dblalign(w2, b2, s2));
+        if (aligned1 != aligned2)
+            return 1;
+
+        /* Move forward one word (8 bytes) */
+        w1 = b1;
+        w2 = b2;
+        len -= 8;
+        s1 += 8;
+        s2 += 8;
+    }
+
+    do {
+        if (*(char*)s1 != tolower(*(char*)s2))
+            return 1;
+        s1++;
+        s2++;
+        len--;
+    } while (len);
+
+    return 0;
+}
+
 #else

 /* No SIMD support, fall back to plain memcmp and a home grown lowercase one */