X-Original-To: alpine-aports@lists.alpinelinux.org Received: from mx12.valuehost.ru (mx12.valuehost.ru [217.112.42.215]) by lists.alpinelinux.org (Postfix) with ESMTP id 87076F84F32 for ; Thu, 13 Dec 2018 07:09:26 +0000 (UTC) Received: from mx7.valuehost.ru (unknown [127.0.0.255]) by mx12.valuehost.ru (Postfix) with ESMTP id B9F0F60482 for ; Thu, 13 Dec 2018 10:09:25 +0300 (MSK) From: alpine-mips-patches Date: Thu, 13 Dec 2018 05:51:24 +0000 Subject: [alpine-aports] [PATCH] main/snappy: optimize on mips*, s390x To: alpine-aports@lists.alpinelinux.org Message-Id: <20181213070925.B9F0F60482@mx12.valuehost.ru> X-Mailinglist: alpine-aports Precedence: list List-Id: Alpine Development List-Unsubscribe: List-Post: List-Help: List-Subscribe: 1) Enable generation of LWL/LWR/etc instructions on pre-R6 mips*. This gives 5x (yes, five times) compression speed-up on P5600 (and additional ~10% for decompression). 2) Recognize s390x as unaligned access capable architecture. See "Integral Boundaries" in SA22-7832-xx. 3) Force use of gcc/clang __builtin_bswapXX for big-endian code path in order to avoid suboptimal generic functions from . Test suite still passes on x86_64, mipseln8hf (hardware) and under qemu-s390x. --- main/snappy/APKBUILD | 9 ++- main/snappy/optimize-mips-s390x.patch | 80 +++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 main/snappy/optimize-mips-s390x.patch diff --git a/main/snappy/APKBUILD b/main/snappy/APKBUILD index 9b1333ae38..3386863626 100644 --- a/main/snappy/APKBUILD +++ b/main/snappy/APKBUILD @@ -2,14 +2,16 @@ # Maintainer: Natanael Copa pkgname=snappy pkgver=1.1.7 -pkgrel=0 +pkgrel=1 pkgdesc="Fast compression and decompression library" url="https://google.github.io/snappy/" arch="all" license="BSD-3-Clause" makedepends="cmake" subpackages="$pkgname-dbg $pkgname-dev $pkgname-doc" -source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz" +source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz + optimize-mips-s390x.patch + " build() { cd "$builddir" @@ -36,4 +38,5 @@ package() { done } -sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf snappy-1.1.7.tar.gz" +sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf snappy-1.1.7.tar.gz +a14b0159631beac628cf99cf9ad970631dfdbf607ca2c3911a64124d1133694689dc76a70f25d4f780ce7093584249905aec2926ef7a3d9350952f7648938392 optimize-mips-s390x.patch" diff --git a/main/snappy/optimize-mips-s390x.patch b/main/snappy/optimize-mips-s390x.patch new file mode 100644 index 0000000000..07c95096ae --- /dev/null +++ b/main/snappy/optimize-mips-s390x.patch @@ -0,0 +1,80 @@ +diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h +index f834bdb..22407ef 100644 +--- a/snappy-stubs-internal.h ++++ b/snappy-stubs-internal.h +@@ -123,7 +123,7 @@ static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); + // x86, PowerPC, and ARM64 can simply do these loads and stores native. + + #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__s390x__) + + #define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) + #define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) +@@ -150,6 +150,8 @@ static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); + // (it ignores __attribute__((packed)) on individual variables). However, + // we can tell it that a _struct_ is unaligned, which has the same effect, + // so we do that. ++// ++// On pre-R6 MIPS just let the compiler use LWL/LWR, SWL/SWR etc. + + #elif defined(__arm__) && \ + !defined(__ARM_ARCH_4__) && \ +@@ -163,7 +165,8 @@ static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); + !defined(__ARM_ARCH_6K__) && \ + !defined(__ARM_ARCH_6Z__) && \ + !defined(__ARM_ARCH_6ZK__) && \ +- !defined(__ARM_ARCH_6T2__) ++ !defined(__ARM_ARCH_6T2__) || \ ++ (defined(__mips__) && (!defined(__mips_isa_rev) || __mips_isa_rev < 6)) + + #if __GNUC__ + #define ATTRIBUTE_PACKED __attribute__((__packed__)) +@@ -184,6 +187,11 @@ struct Unaligned32Struct { + uint8 dummy; // To make the size non-power-of-two. + } ATTRIBUTE_PACKED; + ++struct Unaligned64Struct { ++ uint64 value; ++ uint8 dummy; // To make the size non-power-of-two. ++} ATTRIBUTE_PACKED; ++ + } // namespace internal + } // namespace base + +@@ -203,6 +211,7 @@ struct Unaligned32Struct { + // See if that would be more efficient on platforms supporting it, + // at least for copies. + ++#ifndef __mips__ + inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); +@@ -212,6 +221,13 @@ inline uint64 UNALIGNED_LOAD64(const void *p) { + inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); + } ++#else ++#define UNALIGNED_LOAD64(_p) \ ++ ((reinterpret_cast(_p))->value) ++#define UNALIGNED_STORE64(_p, _val) \ ++ ((reinterpret_cast< ::snappy::base::internal::Unaligned64Struct *>(_p))->value = \ ++ (_val)) ++#endif + + #else + +@@ -274,6 +290,13 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) { + #define bswap_32(x) OSSwapInt32(x) + #define bswap_64(x) OSSwapInt64(x) + ++#elif defined(__GNUC__) ++/* musl provides suboptimal (generic) bswap_xx implementations in */ ++ ++#define bswap_16 __builtin_bswap16 ++#define bswap_32 __builtin_bswap32 ++#define bswap_64 __builtin_bswap64 ++ + #elif defined(HAVE_BYTESWAP_H) + #include + -- 2.19.2 --- Unsubscribe: alpine-aports+unsubscribe@lists.alpinelinux.org Help: alpine-aports+help@lists.alpinelinux.org ---