/* * Copyright (C) 1991, 1993 Free Software Foundation, Inc. * Contributed by Torbjorn Granlund (tege@sics.se). * * NOTE: The canonical source of this file is maintained with the GNU C Library. * Bugs can be reported to bug-glibc@prep.ai.mit.edu. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2, or (at your option) any * later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: memcmp.c,v 1.1 1995/03/14 12:20:54 tom Exp $ * $Log: memcmp.c,v $ * Revision 1.1 1995/03/14 12:20:54 tom * Initial revision */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #undef __ptr_t #if defined (__cplusplus) || (defined (__STDC__) && __STDC__) #define __ptr_t void * #else /* Not C++ or ANSI C. */ #undef const #define const #define __ptr_t char * #endif /* C++ or ANSI C. */ #if defined (HAVE_STRING_H) || defined (_LIBC) #include #endif #ifdef _LIBC #include #else /* Not in the GNU C library. */ #include /* * Type to use for aligned memory operations. * This should normally be the biggest type supported by a single load * and store. Must be an unsigned type. */ #define op_t unsigned long int #define OPSIZ (sizeof(op_t)) /* * Threshold value for when to enter the unrolled loops. */ #define OP_T_THRES 16 /* * Type to use for unaligned operations. */ typedef unsigned char byte; #ifndef WORDS_BIGENDIAN #define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) #else #define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) #endif #endif /* In the GNU C library. */ #ifdef WORDS_BIGENDIAN #define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) #else #define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) #endif /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ /* * The strategy of this memcmp is: * * 1. Compare bytes until one of the block pointers is aligned. * * 2. Compare using memcmp_common_alignment or * memcmp_not_common_alignment, regarding the alignment of the other * block after the initial byte operations. The maximum number of * full words (of type op_t) are compared in this way. * * 3. Compare the few remaining bytes. */ #ifndef WORDS_BIGENDIAN /* * memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. * A and B are known to be different. * This is needed only on little-endian machines. */ #ifdef __GNUC__ __inline #endif static int memcmp_bytes (a, b) op_t a, b; { long int srcp1 = (long int) &a; long int srcp2 = (long int) &b; op_t a0, b0; do{ a0 = ((byte *) srcp1)[0]; b0 = ((byte *) srcp2)[0]; srcp1 += 1; srcp2 += 1; } while (a0 == b0); return a0 - b0; } #endif /* * memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' * objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for * memory operations on `op_t's. */ #ifdef __GNUC__ __inline #endif static int memcmp_common_alignment (srcp1, srcp2, len) long int srcp1; long int srcp2; size_t len; { op_t a0, a1; op_t b0, b1; switch (len % 4) { case 2: a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; srcp1 -= 2 * OPSIZ; srcp2 -= 2 * OPSIZ; len += 2; goto do1; case 3: a1 = ((op_t *) srcp1)[0]; b1 = ((op_t *) srcp2)[0]; srcp1 -= OPSIZ; srcp2 -= OPSIZ; len += 1; goto do2; case 0: if (OP_T_THRES <= 3 * OPSIZ && len == 0) return 0; a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; goto do3; case 1: a1 = ((op_t *) srcp1)[0]; b1 = ((op_t *) srcp2)[0]; srcp1 += OPSIZ; srcp2 += OPSIZ; len -= 1; if (OP_T_THRES <= 3 * OPSIZ && len == 0) goto do0; /* Fall through. */ } do{ a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; if (a1 != b1) return CMP_LT_OR_GT (a1, b1); do3: a1 = ((op_t *) srcp1)[1]; b1 = ((op_t *) srcp2)[1]; if (a0 != b0) return CMP_LT_OR_GT (a0, b0); do2: a0 = ((op_t *) srcp1)[2]; b0 = ((op_t *) srcp2)[2]; if (a1 != b1) return CMP_LT_OR_GT (a1, b1); do1: a1 = ((op_t *) srcp1)[3]; b1 = ((op_t *) srcp2)[3]; if (a0 != b0) return CMP_LT_OR_GT (a0, b0); srcp1 += 4 * OPSIZ; srcp2 += 4 * OPSIZ; len -= 4; } while (len != 0); /* * This is the right position for do0. Please don't move * it into the loop. */ do0: if (a1 != b1) return CMP_LT_OR_GT (a1, b1); return 0; } /* * memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN * `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory * operations on `op_t', but SRCP1 *should be unaligned*. */ #ifdef __GNUC__ __inline #endif static int memcmp_not_common_alignment (srcp1, srcp2, len) long int srcp1; long int srcp2; size_t len; { op_t a0, a1, a2, a3; op_t b0, b1, b2, b3; op_t x; int shl, shr; /* * Calculate how to shift a word read at the memory operation * aligned srcp1 to make it aligned for comparison. */ shl = 8 * (srcp1 % OPSIZ); shr = 8 * OPSIZ - shl; /* * Make SRCP1 aligned by rounding it down to the beginning of the `op_t' * it points in the middle of. */ srcp1 &= -OPSIZ; switch (len % 4) { case 2: a1 = ((op_t *) srcp1)[0]; a2 = ((op_t *) srcp1)[1]; b2 = ((op_t *) srcp2)[0]; srcp1 -= 1 * OPSIZ; srcp2 -= 2 * OPSIZ; len += 2; goto do1; case 3: a0 = ((op_t *) srcp1)[0]; a1 = ((op_t *) srcp1)[1]; b1 = ((op_t *) srcp2)[0]; srcp2 -= 1 * OPSIZ; len += 1; goto do2; case 0: if (OP_T_THRES <= 3 * OPSIZ && len == 0) return 0; a3 = ((op_t *) srcp1)[0]; a0 = ((op_t *) srcp1)[1]; b0 = ((op_t *) srcp2)[0]; srcp1 += 1 * OPSIZ; goto do3; case 1: a2 = ((op_t *) srcp1)[0]; a3 = ((op_t *) srcp1)[1]; b3 = ((op_t *) srcp2)[0]; srcp1 += 2 * OPSIZ; srcp2 += 1 * OPSIZ; len -= 1; if (OP_T_THRES <= 3 * OPSIZ && len == 0) goto do0; /* Fall through. */ } do{ a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; x = MERGE(a2, shl, a3, shr); if (x != b3) return CMP_LT_OR_GT (x, b3); do3: a1 = ((op_t *) srcp1)[1]; b1 = ((op_t *) srcp2)[1]; x = MERGE(a3, shl, a0, shr); if (x != b0) return CMP_LT_OR_GT (x, b0); do2: a2 = ((op_t *) srcp1)[2]; b2 = ((op_t *) srcp2)[2]; x = MERGE(a0, shl, a1, shr); if (x != b1) return CMP_LT_OR_GT (x, b1); do1: a3 = ((op_t *) srcp1)[3]; b3 = ((op_t *) srcp2)[3]; x = MERGE(a1, shl, a2, shr); if (x != b2) return CMP_LT_OR_GT (x, b2); srcp1 += 4 * OPSIZ; srcp2 += 4 * OPSIZ; len -= 4; } while (len != 0); /* * This is the right position for do0. Please don't move * it into the loop. */ do0: x = MERGE(a2, shl, a3, shr); if (x != b3) return CMP_LT_OR_GT (x, b3); return 0; } int memcmp (s1, s2, len) const __ptr_t s1; const __ptr_t s2; size_t len; { op_t a0; op_t b0; long int srcp1 = (long int) s1; long int srcp2 = (long int) s2; op_t res; if (len >= OP_T_THRES) { /* * There are at least some bytes to compare. No need to test *for LEN == 0 in this alignment loop. */ while (srcp2 % OPSIZ != 0) { a0 = ((byte *) srcp1)[0]; b0 = ((byte *) srcp2)[0]; srcp1 += 1; srcp2 += 1; res = a0 - b0; if (res != 0) return res; len -= 1; } /* * SRCP2 is now aligned for memory operations on `op_t'. * SRCP1 alignment determines if we can do a simple, * aligned compare or need to shuffle bits. */ if (srcp1 % OPSIZ == 0) res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); else res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); if (res != 0) return res; /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ srcp1 += len & -OPSIZ; srcp2 += len & -OPSIZ; len %= OPSIZ; } /* There are just a few bytes to compare. Use byte memory operations. */ while (len != 0) { a0 = ((byte *) srcp1)[0]; b0 = ((byte *) srcp2)[0]; srcp1 += 1; srcp2 += 1; res = a0 - b0; if (res != 0) return res; len -= 1; } return 0; }