Mail archive
alpine-aports

[alpine-aports] [PATCH] main/snappy: optimize on mips*, s390x

From: alpine-mips-patches <info_at_mobile-stream.com>
Date: Thu, 13 Dec 2018 05:51:24 +0000

1) Enable generation of LWL/LWR/etc instructions on pre-R6 mips*.
   This gives 5x (yes, five times) compression speed-up on P5600
   (and additional ~10% for decompression).
2) Recognize s390x as unaligned access capable architecture.
   See "Integral Boundaries" in SA22-7832-xx.
3) Force use of gcc/clang __builtin_bswapXX for big-endian code path
   in order to avoid suboptimal generic functions from <byteswap.h>.

Test suite still passes on x86_64, mipseln8hf (hardware) and under qemu-s390x.
---
 main/snappy/APKBUILD                  |  9 ++-
 main/snappy/optimize-mips-s390x.patch | 80 +++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)
 create mode 100644 main/snappy/optimize-mips-s390x.patch
diff --git a/main/snappy/APKBUILD b/main/snappy/APKBUILD
index 9b1333ae38..3386863626 100644
--- a/main/snappy/APKBUILD
+++ b/main/snappy/APKBUILD
_at_@ -2,14 +2,16 @@
 # Maintainer: Natanael Copa <ncopa_at_alpinelinux.org>
 pkgname=snappy
 pkgver=1.1.7
-pkgrel=0
+pkgrel=1
 pkgdesc="Fast compression and decompression library"
 url="https://google.github.io/snappy/"
 arch="all"
 license="BSD-3-Clause"
 makedepends="cmake"
 subpackages="$pkgname-dbg $pkgname-dev $pkgname-doc"
-source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz"
+source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz
+	optimize-mips-s390x.patch
+	"
 
 build() {
 	cd "$builddir"
_at_@ -36,4 +38,5 @@ package() {
 	done
 }
 
-sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf  snappy-1.1.7.tar.gz"
+sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf  snappy-1.1.7.tar.gz
+a14b0159631beac628cf99cf9ad970631dfdbf607ca2c3911a64124d1133694689dc76a70f25d4f780ce7093584249905aec2926ef7a3d9350952f7648938392  optimize-mips-s390x.patch"
diff --git a/main/snappy/optimize-mips-s390x.patch b/main/snappy/optimize-mips-s390x.patch
new file mode 100644
index 0000000000..07c95096ae
--- /dev/null
+++ b/main/snappy/optimize-mips-s390x.patch
_at_@ -0,0 +1,80 @@
+diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h
+index f834bdb..22407ef 100644
+--- a/snappy-stubs-internal.h
++++ b/snappy-stubs-internal.h
+_at_@ -123,7 +123,7 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+ // x86, PowerPC, and ARM64 can simply do these loads and stores native.
+ 
+ #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__s390x__)
+ 
+ #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+ #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+_at_@ -150,6 +150,8 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+ // (it ignores __attribute__((packed)) on individual variables). However,
+ // we can tell it that a _struct_ is unaligned, which has the same effect,
+ // so we do that.
++//
++// On pre-R6 MIPS just let the compiler use LWL/LWR, SWL/SWR etc.
+ 
+ #elif defined(__arm__) && \
+       !defined(__ARM_ARCH_4__) && \
+_at_@ -163,7 +165,8 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+       !defined(__ARM_ARCH_6K__) && \
+       !defined(__ARM_ARCH_6Z__) && \
+       !defined(__ARM_ARCH_6ZK__) && \
+-      !defined(__ARM_ARCH_6T2__)
++      !defined(__ARM_ARCH_6T2__) || \
++      (defined(__mips__) && (!defined(__mips_isa_rev) || __mips_isa_rev < 6))
+ 
+ #if __GNUC__
+ #define ATTRIBUTE_PACKED __attribute__((__packed__))
+_at_@ -184,6 +187,11 @@ struct Unaligned32Struct {
+   uint8 dummy;  // To make the size non-power-of-two.
+ } ATTRIBUTE_PACKED;
+ 
++struct Unaligned64Struct {
++  uint64 value;
++  uint8 dummy;  // To make the size non-power-of-two.
++} ATTRIBUTE_PACKED;
++
+ }  // namespace internal
+ }  // namespace base
+ 
+_at_@ -203,6 +211,7 @@ struct Unaligned32Struct {
+ // See if that would be more efficient on platforms supporting it,
+ // at least for copies.
+ 
++#ifndef __mips__
+ inline uint64 UNALIGNED_LOAD64(const void *p) {
+   uint64 t;
+   memcpy(&t, p, sizeof t);
+_at_@ -212,6 +221,13 @@ inline uint64 UNALIGNED_LOAD64(const void *p) {
+ inline void UNALIGNED_STORE64(void *p, uint64 v) {
+   memcpy(p, &v, sizeof v);
+ }
++#else
++#define UNALIGNED_LOAD64(_p) \
++    ((reinterpret_cast<const ::snappy::base::internal::Unaligned64Struct *>(_p))->value)
++#define UNALIGNED_STORE64(_p, _val) \
++    ((reinterpret_cast< ::snappy::base::internal::Unaligned64Struct *>(_p))->value = \
++         (_val))
++#endif
+ 
+ #else
+ 
+_at_@ -274,6 +290,13 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
+ #define bswap_32(x) OSSwapInt32(x)
+ #define bswap_64(x) OSSwapInt64(x)
+ 
++#elif defined(__GNUC__)
++/* musl provides suboptimal (generic) bswap_xx implementations in <byteswap.h> */
++
++#define bswap_16	__builtin_bswap16
++#define bswap_32	__builtin_bswap32
++#define bswap_64	__builtin_bswap64
++
+ #elif defined(HAVE_BYTESWAP_H)
+ #include <byteswap.h>
+ 
-- 
2.19.2
---
Unsubscribe:  alpine-aports+unsubscribe_at_lists.alpinelinux.org
Help:         alpine-aports+help_at_lists.alpinelinux.org
---
Received on Thu Dec 13 2018 - 05:51:24 UTC