~alpine/aports

[alpine-aports] [PATCH] main/snappy: optimize on mips*, s390x

alpine-mips-patches <info@mobile-stream.com>
Details
Message ID
<20181213070925.B9F0F60482@mx12.valuehost.ru>
Sender timestamp
1544680284
DKIM signature
missing
Download raw message
Patch: +86 -3
1) Enable generation of LWL/LWR/etc instructions on pre-R6 mips*.
   This gives 5x (yes, five times) compression speed-up on P5600
   (and additional ~10% for decompression).
2) Recognize s390x as unaligned access capable architecture.
   See "Integral Boundaries" in SA22-7832-xx.
3) Force use of gcc/clang __builtin_bswapXX for big-endian code path
   in order to avoid suboptimal generic functions from <byteswap.h>.

Test suite still passes on x86_64, mipseln8hf (hardware) and under qemu-s390x.
---
 main/snappy/APKBUILD                  |  9 ++-
 main/snappy/optimize-mips-s390x.patch | 80 +++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)
 create mode 100644 main/snappy/optimize-mips-s390x.patch

diff --git a/main/snappy/APKBUILD b/main/snappy/APKBUILD
index 9b1333ae38..3386863626 100644
--- a/main/snappy/APKBUILD
+++ b/main/snappy/APKBUILD
@@ -2,14 +2,16 @@
# Maintainer: Natanael Copa <ncopa@alpinelinux.org>
pkgname=snappy
pkgver=1.1.7
pkgrel=0
pkgrel=1
pkgdesc="Fast compression and decompression library"
url="https://google.github.io/snappy/"
arch="all"
license="BSD-3-Clause"
makedepends="cmake"
subpackages="$pkgname-dbg $pkgname-dev $pkgname-doc"
source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz"
source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz
	optimize-mips-s390x.patch
	"

build() {
	cd "$builddir"
@@ -36,4 +38,5 @@ package() {
	done
}

sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf  snappy-1.1.7.tar.gz"
sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf  snappy-1.1.7.tar.gz
a14b0159631beac628cf99cf9ad970631dfdbf607ca2c3911a64124d1133694689dc76a70f25d4f780ce7093584249905aec2926ef7a3d9350952f7648938392  optimize-mips-s390x.patch"
diff --git a/main/snappy/optimize-mips-s390x.patch b/main/snappy/optimize-mips-s390x.patch
new file mode 100644
index 0000000000..07c95096ae
--- /dev/null
+++ b/main/snappy/optimize-mips-s390x.patch
@@ -0,0 +1,80 @@
diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h
index f834bdb..22407ef 100644
--- a/snappy-stubs-internal.h
+++ b/snappy-stubs-internal.h
@@ -123,7 +123,7 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
 // x86, PowerPC, and ARM64 can simply do these loads and stores native.
 
 #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
-    defined(__aarch64__)
+    defined(__aarch64__) || defined(__s390x__)
 
 #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
 #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
@@ -150,6 +150,8 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
 // (it ignores __attribute__((packed)) on individual variables). However,
 // we can tell it that a _struct_ is unaligned, which has the same effect,
 // so we do that.
+//
+// On pre-R6 MIPS just let the compiler use LWL/LWR, SWL/SWR etc.
 
 #elif defined(__arm__) && \
       !defined(__ARM_ARCH_4__) && \
@@ -163,7 +165,8 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
       !defined(__ARM_ARCH_6K__) && \
       !defined(__ARM_ARCH_6Z__) && \
       !defined(__ARM_ARCH_6ZK__) && \
-      !defined(__ARM_ARCH_6T2__)
+      !defined(__ARM_ARCH_6T2__) || \
+      (defined(__mips__) && (!defined(__mips_isa_rev) || __mips_isa_rev < 6))
 
 #if __GNUC__
 #define ATTRIBUTE_PACKED __attribute__((__packed__))
@@ -184,6 +187,11 @@ struct Unaligned32Struct {
   uint8 dummy;  // To make the size non-power-of-two.
 } ATTRIBUTE_PACKED;
 
+struct Unaligned64Struct {
+  uint64 value;
+  uint8 dummy;  // To make the size non-power-of-two.
+} ATTRIBUTE_PACKED;
+
 }  // namespace internal
 }  // namespace base
 
@@ -203,6 +211,7 @@ struct Unaligned32Struct {
 // See if that would be more efficient on platforms supporting it,
 // at least for copies.
 
+#ifndef __mips__
 inline uint64 UNALIGNED_LOAD64(const void *p) {
   uint64 t;
   memcpy(&t, p, sizeof t);
@@ -212,6 +221,13 @@ inline uint64 UNALIGNED_LOAD64(const void *p) {
 inline void UNALIGNED_STORE64(void *p, uint64 v) {
   memcpy(p, &v, sizeof v);
 }
+#else
+#define UNALIGNED_LOAD64(_p) \
+    ((reinterpret_cast<const ::snappy::base::internal::Unaligned64Struct *>(_p))->value)
+#define UNALIGNED_STORE64(_p, _val) \
+    ((reinterpret_cast< ::snappy::base::internal::Unaligned64Struct *>(_p))->value = \
+         (_val))
+#endif
 
 #else
 
@@ -274,6 +290,13 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
 #define bswap_32(x) OSSwapInt32(x)
 #define bswap_64(x) OSSwapInt64(x)
 
+#elif defined(__GNUC__)
+/* musl provides suboptimal (generic) bswap_xx implementations in <byteswap.h> */
+
+#define bswap_16	__builtin_bswap16
+#define bswap_32	__builtin_bswap32
+#define bswap_64	__builtin_bswap64
+
 #elif defined(HAVE_BYTESWAP_H)
 #include <byteswap.h>
 
-- 
2.19.2




---
Unsubscribe:  alpine-aports+unsubscribe@lists.alpinelinux.org
Help:         alpine-aports+help@lists.alpinelinux.org
---
Reply to thread Export thread (mbox)