+
+#include "attributes.h"
+#include "version.h"
+#include "libavutil/avconfig.h"
+
+#if AV_HAVE_BIGENDIAN
+# define AV_NE(be, le) (be)
+#else
+# define AV_NE(be, le) (le)
+#endif
+
+//rounded division & shift
+#define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
+/* assume b>0 */
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+/* assume a>0 and b>0 */
+#define FF_CEIL_RSHIFT(a,b) (!av_builtin_constant_p(b) ? -((-(a)) >> (b)) \
+ : ((a) + (1<<(b)) - 1) >> (b))
+#define FFUDIV(a,b) (((a)>0 ?(a):(a)-(b)+1) / (b))
+#define FFUMOD(a,b) ((a)-(b)*FFUDIV(a,b))
+
+/**
+ * Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they
+ * are not representable as absolute values of their type. This is the same
+ * as with *abs()
+ * @see FFNABS()
+ */
+#define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
+#define FFSIGN(a) ((a) > 0 ? 1 : -1)
+
+/**
+ * Negative Absolute value.
+ * this works for all integers of all types.
+ * As with many macros, this evaluates its argument twice, it thus must not have
+ * a sideeffect, that is FFNABS(x++) has undefined behavior.
+ */
+#define FFNABS(a) ((a) <= 0 ? (a) : (-(a)))
+
+/**
+ * Comparator.
+ * For two numerical expressions x and y, gives 1 if x > y, -1 if x < y, and 0
+ * if x == y. This is useful for instance in a qsort comparator callback.
+ * Furthermore, compilers are able to optimize this to branchless code, and
+ * there is no risk of overflow with signed types.
+ * As with many macros, this evaluates its argument multiple times, it thus
+ * must not have a side-effect.
+ */
+#define FFDIFFSIGN(x,y) (((x)>(y)) - ((x)<(y)))
+
+#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
+#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
+#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
+#define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
+
+#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
+#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
+#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
+
+/* misc math functions */
+
+#ifdef HAVE_AV_CONFIG_H
+# include "config.h"
+# include "intmath.h"
+#endif
+
+/* Pull in unguarded fallback defines at the end of this file. */
+#include "common.h"
+
+#ifndef av_log2
+av_const int av_log2(unsigned v);
+#endif
+
+#ifndef av_log2_16bit
+av_const int av_log2_16bit(unsigned v);
+#endif
+
+/**
+ * Clip a signed integer value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static av_always_inline av_const int av_clip_c(int a, int amin, int amax)
+{
+#if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+ if (amin > amax) abort();
+#endif
+ if (a < amin) return amin;
+ else if (a > amax) return amax;
+ else return a;
+}
+
+/**
+ * Clip a signed 64bit integer value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static av_always_inline av_const int64_t av_clip64_c(int64_t a, int64_t amin, int64_t amax)
+{
+#if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+ if (amin > amax) abort();
+#endif
+ if (a < amin) return amin;
+ else if (a > amax) return amax;
+ else return a;
+}
+
+/**
+ * Clip a signed integer value into the 0-255 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const uint8_t av_clip_uint8_c(int a)
+{
+ if (a&(~0xFF)) return (-a)>>31;
+ else return a;
+}
+
+/**
+ * Clip a signed integer value into the -128,127 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const int8_t av_clip_int8_c(int a)
+{
+ if ((a+0x80U) & ~0xFF) return (a>>31) ^ 0x7F;
+ else return a;
+}
+
+/**
+ * Clip a signed integer value into the 0-65535 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const uint16_t av_clip_uint16_c(int a)
+{
+ if (a&(~0xFFFF)) return (-a)>>31;
+ else return a;
+}
+
+/**
+ * Clip a signed integer value into the -32768,32767 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const int16_t av_clip_int16_c(int a)
+{
+ if ((a+0x8000U) & ~0xFFFF) return (a>>31) ^ 0x7FFF;
+ else return a;
+}
+
+/**
+ * Clip a signed 64-bit integer value into the -2147483648,2147483647 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const int32_t av_clipl_int32_c(int64_t a)
+{
+ if ((a+0x80000000u) & ~UINT64_C(0xFFFFFFFF)) return (int32_t)((a>>63) ^ 0x7FFFFFFF);
+ else return (int32_t)a;
+}
+
+/**
+ * Clip a signed integer into the -(2^p),(2^p-1) range.
+ * @param a value to clip
+ * @param p bit position to clip at
+ * @return clipped value
+ */
+static av_always_inline av_const int av_clip_intp2_c(int a, int p)
+{
+ if ((a + (1 << p)) & ~((2 << p) - 1))
+ return (a >> 31) ^ ((1 << p) - 1);
+ else
+ return a;
+}
+
+/**
+ * Clip a signed integer to an unsigned power of two range.
+ * @param a value to clip
+ * @param p bit position to clip at
+ * @return clipped value
+ */
+static av_always_inline av_const unsigned av_clip_uintp2_c(int a, int p)
+{
+ if (a & ~((1<> 31 & ((1<
= 2
+ if (amin > amax) abort();
+#endif
+ if (a < amin) return amin;
+ else if (a > amax) return amax;
+ else return a;
+}
+
+/**
+ * Clip a double value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static av_always_inline av_const double av_clipd_c(double a, double amin, double amax)
+{
+#if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+ if (amin > amax) abort();
+#endif
+ if (a < amin) return amin;
+ else if (a > amax) return amax;
+ else return a;
+}
+
+/** Compute ceil(log2(x)).
+ * @param x value used to compute ceil(log2(x))
+ * @return computed ceiling of log2(x)
+ */
+static av_always_inline av_const int av_ceil_log2_c(int x)
+{
+ return av_log2((x - 1) << 1);
+}
+
+/**
+ * Count number of bits set to one in x
+ * @param x value to count bits of
+ * @return the number of bits set to one in x
+ */
+static av_always_inline av_const int av_popcount_c(uint32_t x)
+{
+ x -= (x >> 1) & 0x55555555;
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ x += x >> 8;
+ return (x + (x >> 16)) & 0x3F;
+}
+
+/**
+ * Count number of bits set to one in x
+ * @param x value to count bits of
+ * @return the number of bits set to one in x
+ */
+static av_always_inline av_const int av_popcount64_c(uint64_t x)
+{
+ return av_popcount((uint32_t)x) + av_popcount((uint32_t)(x >> 32));
+}
+
+#define MKTAG(a,b,c,d) ((a) | ((b) << 8) | ((c) << 16) | ((unsigned)(d) << 24))
+#define MKBETAG(a,b,c,d) ((d) | ((c) << 8) | ((b) << 16) | ((unsigned)(a) << 24))
+
+/**
+ * Convert a UTF-8 character (up to 4 bytes) to its 32-bit UCS-4 encoded form.
+ *
+ * @param val Output value, must be an lvalue of type uint32_t.
+ * @param GET_BYTE Expression reading one byte from the input.
+ * Evaluated up to 7 times (4 for the currently
+ * assigned Unicode range). With a memory buffer
+ * input, this could be *ptr++.
+ * @param ERROR Expression to be evaluated on invalid input,
+ * typically a goto statement.
+ *
+ * @warning ERROR should not contain a loop control statement which
+ * could interact with the internal while loop, and should force an
+ * exit from the macro code (e.g. through a goto or a return) in order
+ * to prevent undefined results.
+ */
+#define GET_UTF8(val, GET_BYTE, ERROR)\
+ val= GET_BYTE;\
+ {\
+ uint32_t top = (val & 128) >> 1;\
+ if ((val & 0xc0) == 0x80 || val >= 0xFE)\
+ ERROR\
+ while (val & top) {\
+ int tmp= GET_BYTE - 128;\
+ if(tmp>>6)\
+ ERROR\
+ val= (val<<6) + tmp;\
+ top <<= 5;\
+ }\
+ val &= (top << 1) - 1;\
+ }
+
+/**
+ * Convert a UTF-16 character (2 or 4 bytes) to its 32-bit UCS-4 encoded form.
+ *
+ * @param val Output value, must be an lvalue of type uint32_t.
+ * @param GET_16BIT Expression returning two bytes of UTF-16 data converted
+ * to native byte order. Evaluated one or two times.
+ * @param ERROR Expression to be evaluated on invalid input,
+ * typically a goto statement.
+ */
+#define GET_UTF16(val, GET_16BIT, ERROR)\
+ val = GET_16BIT;\
+ {\
+ unsigned int hi = val - 0xD800;\
+ if (hi < 0x800) {\
+ val = GET_16BIT - 0xDC00;\
+ if (val > 0x3FFU || hi > 0x3FFU)\
+ ERROR\
+ val += (hi<<10) + 0x10000;\
+ }\
+ }\
+
+/**
+ * @def PUT_UTF8(val, tmp, PUT_BYTE)
+ * Convert a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
+ * @param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-8. If
+ * val is given as a function it is executed only once.
+ * @param tmp is a temporary variable and should be of type uint8_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_BYTE.
+ * @param PUT_BYTE writes the converted UTF-8 bytes to any proper destination.
+ * It could be a function or a statement, and uses tmp as the input byte.
+ * For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be
+ * executed up to 4 times for values in the valid UTF-8 range and up to
+ * 7 times in the general case, depending on the length of the converted
+ * Unicode character.
+ */
+#define PUT_UTF8(val, tmp, PUT_BYTE)\
+ {\
+ int bytes, shift;\
+ uint32_t in = val;\
+ if (in < 0x80) {\
+ tmp = in;\
+ PUT_BYTE\
+ } else {\
+ bytes = (av_log2(in) + 4) / 5;\
+ shift = (bytes - 1) * 6;\
+ tmp = (256 - (256 >> bytes)) | (in >> shift);\
+ PUT_BYTE\
+ while (shift >= 6) {\
+ shift -= 6;\
+ tmp = 0x80 | ((in >> shift) & 0x3f);\
+ PUT_BYTE\
+ }\
+ }\
+ }
+
+/**
+ * @def PUT_UTF16(val, tmp, PUT_16BIT)
+ * Convert a 32-bit Unicode character to its UTF-16 encoded form (2 or 4 bytes).
+ * @param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-16. If
+ * val is given as a function it is executed only once.
+ * @param tmp is a temporary variable and should be of type uint16_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_16BIT.
+ * @param PUT_16BIT writes the converted UTF-16 data to any proper destination
+ * in desired endianness. It could be a function or a statement, and uses tmp
+ * as the input byte. For example, PUT_BYTE could be "*output++ = tmp;"
+ * PUT_BYTE will be executed 1 or 2 times depending on input character.
+ */
+#define PUT_UTF16(val, tmp, PUT_16BIT)\
+ {\
+ uint32_t in = val;\
+ if (in < 0x10000) {\
+ tmp = in;\
+ PUT_16BIT\
+ } else {\
+ tmp = 0xD800 | ((in - 0x10000) >> 10);\
+ PUT_16BIT\
+ tmp = 0xDC00 | ((in - 0x10000) & 0x3FF);\
+ PUT_16BIT\
+ }\
+ }\
+
+
+
+#include "mem.h"
+
+#ifdef HAVE_AV_CONFIG_H
+# include "internal.h"
+#endif /* HAVE_AV_CONFIG_H */
+
+#endif /* AVUTIL_COMMON_H */
+
+/*
+ * The following definitions are outside the multiple inclusion guard
+ * to ensure they are immediately available in intmath.h.
+ */
+
+#ifndef av_ceil_log2
+# define av_ceil_log2 av_ceil_log2_c
+#endif
+#ifndef av_clip
+# define av_clip av_clip_c
+#endif
+#ifndef av_clip64
+# define av_clip64 av_clip64_c
+#endif
+#ifndef av_clip_uint8
+# define av_clip_uint8 av_clip_uint8_c
+#endif
+#ifndef av_clip_int8
+# define av_clip_int8 av_clip_int8_c
+#endif
+#ifndef av_clip_uint16
+# define av_clip_uint16 av_clip_uint16_c
+#endif
+#ifndef av_clip_int16
+# define av_clip_int16 av_clip_int16_c
+#endif
+#ifndef av_clipl_int32
+# define av_clipl_int32 av_clipl_int32_c
+#endif
+#ifndef av_clip_intp2
+# define av_clip_intp2 av_clip_intp2_c
+#endif
+#ifndef av_clip_uintp2
+# define av_clip_uintp2 av_clip_uintp2_c
+#endif
+#ifndef av_mod_uintp2
+# define av_mod_uintp2 av_mod_uintp2_c
+#endif
+#ifndef av_sat_add32
+# define av_sat_add32 av_sat_add32_c
+#endif
+#ifndef av_sat_dadd32
+# define av_sat_dadd32 av_sat_dadd32_c
+#endif
+#ifndef av_clipf
+# define av_clipf av_clipf_c
+#endif
+#ifndef av_clipd
+# define av_clipd av_clipd_c
+#endif
+#ifndef av_popcount
+# define av_popcount av_popcount_c
+#endif
+#ifndef av_popcount64
+# define av_popcount64 av_popcount64_c
+#endif
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/cpu.h b/TMessagesProj/jni/ffmpeg/include/libavutil/cpu.h
new file mode 100644
index 000000000..bff8518cc
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/cpu.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_CPU_H
+#define AVUTIL_CPU_H
+
+#include "attributes.h"
+
+#define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */
+
+ /* lower 16 bits - CPU features */
+#define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX
+#define AV_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext
+#define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext
+#define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW
+#define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions
+#define AV_CPU_FLAG_SSE2 0x0010 ///< PIV SSE2 functions
+#define AV_CPU_FLAG_SSE2SLOW 0x40000000 ///< SSE2 supported, but usually not faster
+ ///< than regular MMX/SSE (e.g. Core1)
+#define AV_CPU_FLAG_3DNOWEXT 0x0020 ///< AMD 3DNowExt
+#define AV_CPU_FLAG_SSE3 0x0040 ///< Prescott SSE3 functions
+#define AV_CPU_FLAG_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster
+ ///< than regular MMX/SSE (e.g. Core1)
+#define AV_CPU_FLAG_SSSE3 0x0080 ///< Conroe SSSE3 functions
+#define AV_CPU_FLAG_ATOM 0x10000000 ///< Atom processor, some SSSE3 instructions are slower
+#define AV_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions
+#define AV_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions
+#define AV_CPU_FLAG_AESNI 0x80000 ///< Advanced Encryption Standard functions
+#define AV_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used
+#define AV_CPU_FLAG_AVXSLOW 0x8000000 ///< AVX supported, but slow when using YMM registers (e.g. Bulldozer)
+#define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions
+#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
+#define AV_CPU_FLAG_CMOV 0x1000 ///< supports cmov instruction
+#define AV_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used
+#define AV_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions
+#define AV_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1
+#define AV_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2
+
+#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
+#define AV_CPU_FLAG_VSX 0x0002 ///< ISA 2.06
+#define AV_CPU_FLAG_POWER8 0x0004 ///< ISA 2.07
+
+#define AV_CPU_FLAG_ARMV5TE (1 << 0)
+#define AV_CPU_FLAG_ARMV6 (1 << 1)
+#define AV_CPU_FLAG_ARMV6T2 (1 << 2)
+#define AV_CPU_FLAG_VFP (1 << 3)
+#define AV_CPU_FLAG_VFPV3 (1 << 4)
+#define AV_CPU_FLAG_NEON (1 << 5)
+#define AV_CPU_FLAG_ARMV8 (1 << 6)
+#define AV_CPU_FLAG_SETEND (1 <<16)
+
+/**
+ * Return the flags which specify extensions supported by the CPU.
+ * The returned value is affected by av_force_cpu_flags() if that was used
+ * before. So av_get_cpu_flags() can easily be used in a application to
+ * detect the enabled cpu flags.
+ */
+int av_get_cpu_flags(void);
+
+/**
+ * Disables cpu detection and forces the specified flags.
+ * -1 is a special case that disables forcing of specific flags.
+ */
+void av_force_cpu_flags(int flags);
+
+/**
+ * Set a mask on flags returned by av_get_cpu_flags().
+ * This function is mainly useful for testing.
+ * Please use av_force_cpu_flags() and av_get_cpu_flags() instead which are more flexible
+ *
+ * @warning this function is not thread safe.
+ */
+attribute_deprecated void av_set_cpu_flags_mask(int mask);
+
+/**
+ * Parse CPU flags from a string.
+ *
+ * The returned flags contain the specified flags as well as related unspecified flags.
+ *
+ * This function exists only for compatibility with libav.
+ * Please use av_parse_cpu_caps() when possible.
+ * @return a combination of AV_CPU_* flags, negative on error.
+ */
+attribute_deprecated
+int av_parse_cpu_flags(const char *s);
+
+/**
+ * Parse CPU caps from a string and update the given AV_CPU_* flags based on that.
+ *
+ * @return negative on error.
+ */
+int av_parse_cpu_caps(unsigned *flags, const char *s);
+
+/**
+ * @return the number of logical CPU cores present.
+ */
+int av_cpu_count(void);
+
+#endif /* AVUTIL_CPU_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/crc.h b/TMessagesProj/jni/ffmpeg/include/libavutil/crc.h
new file mode 100644
index 000000000..ef8a7137e
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/crc.h
@@ -0,0 +1,91 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_CRC_H
+#define AVUTIL_CRC_H
+
+#include
+#include
+#include "attributes.h"
+#include "version.h"
+
+/**
+ * @defgroup lavu_crc32 CRC32
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+typedef uint32_t AVCRC;
+
+typedef enum {
+ AV_CRC_8_ATM,
+ AV_CRC_16_ANSI,
+ AV_CRC_16_CCITT,
+ AV_CRC_32_IEEE,
+ AV_CRC_32_IEEE_LE, /*< reversed bitorder version of AV_CRC_32_IEEE */
+ AV_CRC_16_ANSI_LE, /*< reversed bitorder version of AV_CRC_16_ANSI */
+#if FF_API_CRC_BIG_TABLE
+ AV_CRC_24_IEEE = 12,
+#else
+ AV_CRC_24_IEEE,
+#endif /* FF_API_CRC_BIG_TABLE */
+ AV_CRC_MAX, /*< Not part of public API! Do not use outside libavutil. */
+}AVCRCId;
+
+/**
+ * Initialize a CRC table.
+ * @param ctx must be an array of size sizeof(AVCRC)*257 or sizeof(AVCRC)*1024
+ * @param le If 1, the lowest bit represents the coefficient for the highest
+ * exponent of the corresponding polynomial (both for poly and
+ * actual CRC).
+ * If 0, you must swap the CRC parameter and the result of av_crc
+ * if you need the standard representation (can be simplified in
+ * most cases to e.g. bswap16):
+ * av_bswap32(crc << (32-bits))
+ * @param bits number of bits for the CRC
+ * @param poly generator polynomial without the x**bits coefficient, in the
+ * representation as specified by le
+ * @param ctx_size size of ctx in bytes
+ * @return <0 on failure
+ */
+int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size);
+
+/**
+ * Get an initialized standard CRC table.
+ * @param crc_id ID of a standard CRC
+ * @return a pointer to the CRC table or NULL on failure
+ */
+const AVCRC *av_crc_get_table(AVCRCId crc_id);
+
+/**
+ * Calculate the CRC of a block.
+ * @param crc CRC of previous blocks if any or initial value for CRC
+ * @return CRC updated with the data from the given block
+ *
+ * @see av_crc_init() "le" parameter
+ */
+uint32_t av_crc(const AVCRC *ctx, uint32_t crc,
+ const uint8_t *buffer, size_t length) av_pure;
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_CRC_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/des.h b/TMessagesProj/jni/ffmpeg/include/libavutil/des.h
new file mode 100644
index 000000000..4cf11f5bc
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/des.h
@@ -0,0 +1,77 @@
+/*
+ * DES encryption/decryption
+ * Copyright (c) 2007 Reimar Doeffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_DES_H
+#define AVUTIL_DES_H
+
+#include
+
+/**
+ * @defgroup lavu_des DES
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+typedef struct AVDES {
+ uint64_t round_keys[3][16];
+ int triple_des;
+} AVDES;
+
+/**
+ * Allocate an AVDES context.
+ */
+AVDES *av_des_alloc(void);
+
+/**
+ * @brief Initializes an AVDES context.
+ *
+ * @param key_bits must be 64 or 192
+ * @param decrypt 0 for encryption/CBC-MAC, 1 for decryption
+ * @return zero on success, negative value otherwise
+ */
+int av_des_init(struct AVDES *d, const uint8_t *key, int key_bits, int decrypt);
+
+/**
+ * @brief Encrypts / decrypts using the DES algorithm.
+ *
+ * @param count number of 8 byte blocks
+ * @param dst destination array, can be equal to src, must be 8-byte aligned
+ * @param src source array, can be equal to dst, must be 8-byte aligned, may be NULL
+ * @param iv initialization vector for CBC mode, if NULL then ECB will be used,
+ * must be 8-byte aligned
+ * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_des_crypt(struct AVDES *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt);
+
+/**
+ * @brief Calculates CBC-MAC using the DES algorithm.
+ *
+ * @param count number of 8 byte blocks
+ * @param dst destination array, can be equal to src, must be 8-byte aligned
+ * @param src source array, can be equal to dst, must be 8-byte aligned, may be NULL
+ */
+void av_des_mac(struct AVDES *d, uint8_t *dst, const uint8_t *src, int count);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_DES_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/dict.h b/TMessagesProj/jni/ffmpeg/include/libavutil/dict.h
new file mode 100644
index 000000000..5b8d00339
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/dict.h
@@ -0,0 +1,198 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Public dictionary API.
+ * @deprecated
+ * AVDictionary is provided for compatibility with libav. It is both in
+ * implementation as well as API inefficient. It does not scale and is
+ * extremely slow with large dictionaries.
+ * It is recommended that new code uses our tree container from tree.c/h
+ * where applicable, which uses AVL trees to achieve O(log n) performance.
+ */
+
+#ifndef AVUTIL_DICT_H
+#define AVUTIL_DICT_H
+
+#include
+
+#include "version.h"
+
+/**
+ * @addtogroup lavu_dict AVDictionary
+ * @ingroup lavu_data
+ *
+ * @brief Simple key:value store
+ *
+ * @{
+ * Dictionaries are used for storing key:value pairs. To create
+ * an AVDictionary, simply pass an address of a NULL pointer to
+ * av_dict_set(). NULL can be used as an empty dictionary wherever
+ * a pointer to an AVDictionary is required.
+ * Use av_dict_get() to retrieve an entry or iterate over all
+ * entries and finally av_dict_free() to free the dictionary
+ * and all its contents.
+ *
+ @code
+ AVDictionary *d = NULL; // "create" an empty dictionary
+ AVDictionaryEntry *t = NULL;
+
+ av_dict_set(&d, "foo", "bar", 0); // add an entry
+
+ char *k = av_strdup("key"); // if your strings are already allocated,
+ char *v = av_strdup("value"); // you can avoid copying them like this
+ av_dict_set(&d, k, v, AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL);
+
+ while (t = av_dict_get(d, "", t, AV_DICT_IGNORE_SUFFIX)) {
+ <....> // iterate over all entries in d
+ }
+ av_dict_free(&d);
+ @endcode
+ *
+ */
+
+#define AV_DICT_MATCH_CASE 1 /**< Only get an entry with exact-case key match. Only relevant in av_dict_get(). */
+#define AV_DICT_IGNORE_SUFFIX 2 /**< Return first entry in a dictionary whose first part corresponds to the search key,
+ ignoring the suffix of the found key string. Only relevant in av_dict_get(). */
+#define AV_DICT_DONT_STRDUP_KEY 4 /**< Take ownership of a key that's been
+ allocated with av_malloc() or another memory allocation function. */
+#define AV_DICT_DONT_STRDUP_VAL 8 /**< Take ownership of a value that's been
+ allocated with av_malloc() or another memory allocation function. */
+#define AV_DICT_DONT_OVERWRITE 16 ///< Don't overwrite existing entries.
+#define AV_DICT_APPEND 32 /**< If the entry already exists, append to it. Note that no
+ delimiter is added, the strings are simply concatenated. */
+
+typedef struct AVDictionaryEntry {
+ char *key;
+ char *value;
+} AVDictionaryEntry;
+
+typedef struct AVDictionary AVDictionary;
+
+/**
+ * Get a dictionary entry with matching key.
+ *
+ * The returned entry key or value must not be changed, or it will
+ * cause undefined behavior.
+ *
+ * To iterate through all the dictionary entries, you can set the matching key
+ * to the null string "" and set the AV_DICT_IGNORE_SUFFIX flag.
+ *
+ * @param prev Set to the previous matching element to find the next.
+ * If set to NULL the first matching element is returned.
+ * @param key matching key
+ * @param flags a collection of AV_DICT_* flags controlling how the entry is retrieved
+ * @return found entry or NULL in case no matching entry was found in the dictionary
+ */
+AVDictionaryEntry *av_dict_get(const AVDictionary *m, const char *key,
+ const AVDictionaryEntry *prev, int flags);
+
+/**
+ * Get number of entries in dictionary.
+ *
+ * @param m dictionary
+ * @return number of entries in dictionary
+ */
+int av_dict_count(const AVDictionary *m);
+
+/**
+ * Set the given entry in *pm, overwriting an existing entry.
+ *
+ * Note: If AV_DICT_DONT_STRDUP_KEY or AV_DICT_DONT_STRDUP_VAL is set,
+ * these arguments will be freed on error.
+ *
+ * @param pm pointer to a pointer to a dictionary struct. If *pm is NULL
+ * a dictionary struct is allocated and put in *pm.
+ * @param key entry key to add to *pm (will be av_strduped depending on flags)
+ * @param value entry value to add to *pm (will be av_strduped depending on flags).
+ * Passing a NULL value will cause an existing entry to be deleted.
+ * @return >= 0 on success otherwise an error code <0
+ */
+int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags);
+
+/**
+ * Convenience wrapper for av_dict_set that converts the value to a string
+ * and stores it.
+ *
+ * Note: If AV_DICT_DONT_STRDUP_KEY is set, key will be freed on error.
+ */
+int av_dict_set_int(AVDictionary **pm, const char *key, int64_t value, int flags);
+
+/**
+ * Parse the key/value pairs list and add the parsed entries to a dictionary.
+ *
+ * In case of failure, all the successfully set entries are stored in
+ * *pm. You may need to manually free the created dictionary.
+ *
+ * @param key_val_sep a 0-terminated list of characters used to separate
+ * key from value
+ * @param pairs_sep a 0-terminated list of characters used to separate
+ * two pairs from each other
+ * @param flags flags to use when adding to dictionary.
+ * AV_DICT_DONT_STRDUP_KEY and AV_DICT_DONT_STRDUP_VAL
+ * are ignored since the key/value tokens will always
+ * be duplicated.
+ * @return 0 on success, negative AVERROR code on failure
+ */
+int av_dict_parse_string(AVDictionary **pm, const char *str,
+ const char *key_val_sep, const char *pairs_sep,
+ int flags);
+
+/**
+ * Copy entries from one AVDictionary struct into another.
+ * @param dst pointer to a pointer to a AVDictionary struct. If *dst is NULL,
+ * this function will allocate a struct for you and put it in *dst
+ * @param src pointer to source AVDictionary struct
+ * @param flags flags to use when setting entries in *dst
+ * @note metadata is read using the AV_DICT_IGNORE_SUFFIX flag
+ * @return 0 on success, negative AVERROR code on failure. If dst was allocated
+ * by this function, callers should free the associated memory.
+ */
+int av_dict_copy(AVDictionary **dst, const AVDictionary *src, int flags);
+
+/**
+ * Free all the memory allocated for an AVDictionary struct
+ * and all keys and values.
+ */
+void av_dict_free(AVDictionary **m);
+
+/**
+ * Get dictionary entries as a string.
+ *
+ * Create a string containing dictionary's entries.
+ * Such string may be passed back to av_dict_parse_string().
+ * @note String is escaped with backslashes ('\').
+ *
+ * @param[in] m dictionary
+ * @param[out] buffer Pointer to buffer that will be allocated with string containg entries.
+ * Buffer must be freed by the caller when is no longer needed.
+ * @param[in] key_val_sep character used to separate key from value
+ * @param[in] pairs_sep character used to separate two pairs from each other
+ * @return >= 0 on success, negative on error
+ * @warning Separators cannot be neither '\\' nor '\0'. They also cannot be the same.
+ */
+int av_dict_get_string(const AVDictionary *m, char **buffer,
+ const char key_val_sep, const char pairs_sep);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_DICT_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/display.h b/TMessagesProj/jni/ffmpeg/include/libavutil/display.h
new file mode 100644
index 000000000..c0cfee326
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/display.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2014 Vittorio Giovara
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_DISPLAY_H
+#define AVUTIL_DISPLAY_H
+
+#include
+
+/**
+ * The display transformation matrix specifies an affine transformation that
+ * should be applied to video frames for correct presentation. It is compatible
+ * with the matrices stored in the ISO/IEC 14496-12 container format.
+ *
+ * The data is a 3x3 matrix represented as a 9-element array:
+ *
+ * | a b u |
+ * (a, b, u, c, d, v, x, y, w) -> | c d v |
+ * | x y w |
+ *
+ * All numbers are stored in native endianness, as 16.16 fixed-point values,
+ * except for u, v and w, which are stored as 2.30 fixed-point values.
+ *
+ * The transformation maps a point (p, q) in the source (pre-transformation)
+ * frame to the point (p', q') in the destination (post-transformation) frame as
+ * follows:
+ * | a b u |
+ * (p, q, 1) . | c d v | = z * (p', q', 1)
+ * | x y w |
+ *
+ * The transformation can also be more explicitly written in components as
+ * follows:
+ * p' = (a * p + c * q + x) / z;
+ * q' = (b * p + d * q + y) / z;
+ * z = u * p + v * q + w
+ */
+
+/**
+ * Extract the rotation component of the transformation matrix.
+ *
+ * @param matrix the transformation matrix
+ * @return the angle (in degrees) by which the transformation rotates the frame
+ * counterclockwise. The angle will be in range [-180.0, 180.0],
+ * or NaN if the matrix is singular.
+ *
+ * @note floating point numbers are inherently inexact, so callers are
+ * recommended to round the return value to nearest integer before use.
+ */
+double av_display_rotation_get(const int32_t matrix[9]);
+
+/**
+ * Initialize a transformation matrix describing a pure counterclockwise
+ * rotation by the specified angle (in degrees).
+ *
+ * @param matrix an allocated transformation matrix (will be fully overwritten
+ * by this function)
+ * @param angle rotation angle in degrees.
+ */
+void av_display_rotation_set(int32_t matrix[9], double angle);
+
+/**
+ * Flip the input matrix horizontally and/or vertically.
+ *
+ * @param matrix an allocated transformation matrix
+ * @param hflip whether the matrix should be flipped horizontally
+ * @param vflip whether the matrix should be flipped vertically
+ */
+void av_display_matrix_flip(int32_t matrix[9], int hflip, int vflip);
+
+#endif /* AVUTIL_DISPLAY_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/downmix_info.h b/TMessagesProj/jni/ffmpeg/include/libavutil/downmix_info.h
new file mode 100644
index 000000000..221cf5bf9
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/downmix_info.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2014 Tim Walker
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_DOWNMIX_INFO_H
+#define AVUTIL_DOWNMIX_INFO_H
+
+#include "frame.h"
+
+/**
+ * @file
+ * audio downmix medatata
+ */
+
+/**
+ * @addtogroup lavu_audio
+ * @{
+ */
+
+/**
+ * @defgroup downmix_info Audio downmix metadata
+ * @{
+ */
+
+/**
+ * Possible downmix types.
+ */
+enum AVDownmixType {
+ AV_DOWNMIX_TYPE_UNKNOWN, /**< Not indicated. */
+ AV_DOWNMIX_TYPE_LORO, /**< Lo/Ro 2-channel downmix (Stereo). */
+ AV_DOWNMIX_TYPE_LTRT, /**< Lt/Rt 2-channel downmix, Dolby Surround compatible. */
+ AV_DOWNMIX_TYPE_DPLII, /**< Lt/Rt 2-channel downmix, Dolby Pro Logic II compatible. */
+ AV_DOWNMIX_TYPE_NB /**< Number of downmix types. Not part of ABI. */
+};
+
+/**
+ * This structure describes optional metadata relevant to a downmix procedure.
+ *
+ * All fields are set by the decoder to the value indicated in the audio
+ * bitstream (if present), or to a "sane" default otherwise.
+ */
+typedef struct AVDownmixInfo {
+ /**
+ * Type of downmix preferred by the mastering engineer.
+ */
+ enum AVDownmixType preferred_downmix_type;
+
+ /**
+ * Absolute scale factor representing the nominal level of the center
+ * channel during a regular downmix.
+ */
+ double center_mix_level;
+
+ /**
+ * Absolute scale factor representing the nominal level of the center
+ * channel during an Lt/Rt compatible downmix.
+ */
+ double center_mix_level_ltrt;
+
+ /**
+ * Absolute scale factor representing the nominal level of the surround
+ * channels during a regular downmix.
+ */
+ double surround_mix_level;
+
+ /**
+ * Absolute scale factor representing the nominal level of the surround
+ * channels during an Lt/Rt compatible downmix.
+ */
+ double surround_mix_level_ltrt;
+
+ /**
+ * Absolute scale factor representing the level at which the LFE data is
+ * mixed into L/R channels during downmixing.
+ */
+ double lfe_mix_level;
+} AVDownmixInfo;
+
+/**
+ * Get a frame's AV_FRAME_DATA_DOWNMIX_INFO side data for editing.
+ *
+ * If the side data is absent, it is created and added to the frame.
+ *
+ * @param frame the frame for which the side data is to be obtained or created
+ *
+ * @return the AVDownmixInfo structure to be edited by the caller, or NULL if
+ * the structure cannot be allocated.
+ */
+AVDownmixInfo *av_downmix_info_update_side_data(AVFrame *frame);
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_DOWNMIX_INFO_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/error.h b/TMessagesProj/jni/ffmpeg/include/libavutil/error.h
new file mode 100644
index 000000000..71df4da35
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/error.h
@@ -0,0 +1,126 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * error code definitions
+ */
+
+#ifndef AVUTIL_ERROR_H
+#define AVUTIL_ERROR_H
+
+#include
+#include
+
+/**
+ * @addtogroup lavu_error
+ *
+ * @{
+ */
+
+
+/* error handling */
+#if EDOM > 0
+#define AVERROR(e) (-(e)) ///< Returns a negative error code from a POSIX error code, to return from library functions.
+#define AVUNERROR(e) (-(e)) ///< Returns a POSIX error code from a library function error return value.
+#else
+/* Some platforms have E* and errno already negated. */
+#define AVERROR(e) (e)
+#define AVUNERROR(e) (e)
+#endif
+
+#define FFERRTAG(a, b, c, d) (-(int)MKTAG(a, b, c, d))
+
+#define AVERROR_BSF_NOT_FOUND FFERRTAG(0xF8,'B','S','F') ///< Bitstream filter not found
+#define AVERROR_BUG FFERRTAG( 'B','U','G','!') ///< Internal bug, also see AVERROR_BUG2
+#define AVERROR_BUFFER_TOO_SMALL FFERRTAG( 'B','U','F','S') ///< Buffer too small
+#define AVERROR_DECODER_NOT_FOUND FFERRTAG(0xF8,'D','E','C') ///< Decoder not found
+#define AVERROR_DEMUXER_NOT_FOUND FFERRTAG(0xF8,'D','E','M') ///< Demuxer not found
+#define AVERROR_ENCODER_NOT_FOUND FFERRTAG(0xF8,'E','N','C') ///< Encoder not found
+#define AVERROR_EOF FFERRTAG( 'E','O','F',' ') ///< End of file
+#define AVERROR_EXIT FFERRTAG( 'E','X','I','T') ///< Immediate exit was requested; the called function should not be restarted
+#define AVERROR_EXTERNAL FFERRTAG( 'E','X','T',' ') ///< Generic error in an external library
+#define AVERROR_FILTER_NOT_FOUND FFERRTAG(0xF8,'F','I','L') ///< Filter not found
+#define AVERROR_INVALIDDATA FFERRTAG( 'I','N','D','A') ///< Invalid data found when processing input
+#define AVERROR_MUXER_NOT_FOUND FFERRTAG(0xF8,'M','U','X') ///< Muxer not found
+#define AVERROR_OPTION_NOT_FOUND FFERRTAG(0xF8,'O','P','T') ///< Option not found
+#define AVERROR_PATCHWELCOME FFERRTAG( 'P','A','W','E') ///< Not yet implemented in FFmpeg, patches welcome
+#define AVERROR_PROTOCOL_NOT_FOUND FFERRTAG(0xF8,'P','R','O') ///< Protocol not found
+
+#define AVERROR_STREAM_NOT_FOUND FFERRTAG(0xF8,'S','T','R') ///< Stream not found
+/**
+ * This is semantically identical to AVERROR_BUG
+ * it has been introduced in Libav after our AVERROR_BUG and with a modified value.
+ */
+#define AVERROR_BUG2 FFERRTAG( 'B','U','G',' ')
+#define AVERROR_UNKNOWN FFERRTAG( 'U','N','K','N') ///< Unknown error, typically from an external library
+#define AVERROR_EXPERIMENTAL (-0x2bb2afa8) ///< Requested feature is flagged experimental. Set strict_std_compliance if you really want to use it.
+#define AVERROR_INPUT_CHANGED (-0x636e6701) ///< Input changed between calls. Reconfiguration is required. (can be OR-ed with AVERROR_OUTPUT_CHANGED)
+#define AVERROR_OUTPUT_CHANGED (-0x636e6702) ///< Output changed between calls. Reconfiguration is required. (can be OR-ed with AVERROR_INPUT_CHANGED)
+/* HTTP & RTSP errors */
+#define AVERROR_HTTP_BAD_REQUEST FFERRTAG(0xF8,'4','0','0')
+#define AVERROR_HTTP_UNAUTHORIZED FFERRTAG(0xF8,'4','0','1')
+#define AVERROR_HTTP_FORBIDDEN FFERRTAG(0xF8,'4','0','3')
+#define AVERROR_HTTP_NOT_FOUND FFERRTAG(0xF8,'4','0','4')
+#define AVERROR_HTTP_OTHER_4XX FFERRTAG(0xF8,'4','X','X')
+#define AVERROR_HTTP_SERVER_ERROR FFERRTAG(0xF8,'5','X','X')
+
+#define AV_ERROR_MAX_STRING_SIZE 64
+
+/**
+ * Put a description of the AVERROR code errnum in errbuf.
+ * In case of failure the global variable errno is set to indicate the
+ * error. Even in case of failure av_strerror() will print a generic
+ * error message indicating the errnum provided to errbuf.
+ *
+ * @param errnum error code to describe
+ * @param errbuf buffer to which description is written
+ * @param errbuf_size the size in bytes of errbuf
+ * @return 0 on success, a negative value if a description for errnum
+ * cannot be found
+ */
+int av_strerror(int errnum, char *errbuf, size_t errbuf_size);
+
+/**
+ * Fill the provided buffer with a string containing an error string
+ * corresponding to the AVERROR code errnum.
+ *
+ * @param errbuf a buffer
+ * @param errbuf_size size in bytes of errbuf
+ * @param errnum error code to describe
+ * @return the buffer in input, filled with the error description
+ * @see av_strerror()
+ */
+static inline char *av_make_error_string(char *errbuf, size_t errbuf_size, int errnum)
+{
+ av_strerror(errnum, errbuf, errbuf_size);
+ return errbuf;
+}
+
+/**
+ * Convenience macro, the return value should be used only directly in
+ * function arguments but never stand-alone.
+ */
+#define av_err2str(errnum) \
+ av_make_error_string((char[AV_ERROR_MAX_STRING_SIZE]){0}, AV_ERROR_MAX_STRING_SIZE, errnum)
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_ERROR_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/eval.h b/TMessagesProj/jni/ffmpeg/include/libavutil/eval.h
new file mode 100644
index 000000000..dacd22b96
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/eval.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * simple arithmetic expression evaluator
+ */
+
+#ifndef AVUTIL_EVAL_H
+#define AVUTIL_EVAL_H
+
+#include "avutil.h"
+
+typedef struct AVExpr AVExpr;
+
+/**
+ * Parse and evaluate an expression.
+ * Note, this is significantly slower than av_expr_eval().
+ *
+ * @param res a pointer to a double where is put the result value of
+ * the expression, or NAN in case of error
+ * @param s expression as a zero terminated string, for example "1+2^3+5*5+sin(2/3)"
+ * @param const_names NULL terminated array of zero terminated strings of constant identifiers, for example {"PI", "E", 0}
+ * @param const_values a zero terminated array of values for the identifiers from const_names
+ * @param func1_names NULL terminated array of zero terminated strings of funcs1 identifiers
+ * @param funcs1 NULL terminated array of function pointers for functions which take 1 argument
+ * @param func2_names NULL terminated array of zero terminated strings of funcs2 identifiers
+ * @param funcs2 NULL terminated array of function pointers for functions which take 2 arguments
+ * @param opaque a pointer which will be passed to all functions from funcs1 and funcs2
+ * @param log_ctx parent logging context
+ * @return >= 0 in case of success, a negative value corresponding to an
+ * AVERROR code otherwise
+ */
+int av_expr_parse_and_eval(double *res, const char *s,
+ const char * const *const_names, const double *const_values,
+ const char * const *func1_names, double (* const *funcs1)(void *, double),
+ const char * const *func2_names, double (* const *funcs2)(void *, double, double),
+ void *opaque, int log_offset, void *log_ctx);
+
+/**
+ * Parse an expression.
+ *
+ * @param expr a pointer where is put an AVExpr containing the parsed
+ * value in case of successful parsing, or NULL otherwise.
+ * The pointed to AVExpr must be freed with av_expr_free() by the user
+ * when it is not needed anymore.
+ * @param s expression as a zero terminated string, for example "1+2^3+5*5+sin(2/3)"
+ * @param const_names NULL terminated array of zero terminated strings of constant identifiers, for example {"PI", "E", 0}
+ * @param func1_names NULL terminated array of zero terminated strings of funcs1 identifiers
+ * @param funcs1 NULL terminated array of function pointers for functions which take 1 argument
+ * @param func2_names NULL terminated array of zero terminated strings of funcs2 identifiers
+ * @param funcs2 NULL terminated array of function pointers for functions which take 2 arguments
+ * @param log_ctx parent logging context
+ * @return >= 0 in case of success, a negative value corresponding to an
+ * AVERROR code otherwise
+ */
+int av_expr_parse(AVExpr **expr, const char *s,
+ const char * const *const_names,
+ const char * const *func1_names, double (* const *funcs1)(void *, double),
+ const char * const *func2_names, double (* const *funcs2)(void *, double, double),
+ int log_offset, void *log_ctx);
+
+/**
+ * Evaluate a previously parsed expression.
+ *
+ * @param const_values a zero terminated array of values for the identifiers from av_expr_parse() const_names
+ * @param opaque a pointer which will be passed to all functions from funcs1 and funcs2
+ * @return the value of the expression
+ */
+double av_expr_eval(AVExpr *e, const double *const_values, void *opaque);
+
+/**
+ * Free a parsed expression previously created with av_expr_parse().
+ */
+void av_expr_free(AVExpr *e);
+
+/**
+ * Parse the string in numstr and return its value as a double. If
+ * the string is empty, contains only whitespaces, or does not contain
+ * an initial substring that has the expected syntax for a
+ * floating-point number, no conversion is performed. In this case,
+ * returns a value of zero and the value returned in tail is the value
+ * of numstr.
+ *
+ * @param numstr a string representing a number, may contain one of
+ * the International System number postfixes, for example 'K', 'M',
+ * 'G'. If 'i' is appended after the postfix, powers of 2 are used
+ * instead of powers of 10. The 'B' postfix multiplies the value by
+ * 8, and can be appended after another postfix or used alone. This
+ * allows using for example 'KB', 'MiB', 'G' and 'B' as postfix.
+ * @param tail if non-NULL puts here the pointer to the char next
+ * after the last parsed character
+ */
+double av_strtod(const char *numstr, char **tail);
+
+#endif /* AVUTIL_EVAL_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/ffversion.h b/TMessagesProj/jni/ffmpeg/include/libavutil/ffversion.h
new file mode 100644
index 000000000..e84489812
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/ffversion.h
@@ -0,0 +1,5 @@
+/* Automatically generated by version.sh, do not manually edit! */
+#ifndef AVUTIL_FFVERSION_H
+#define AVUTIL_FFVERSION_H
+#define FFMPEG_VERSION "2.8.git"
+#endif /* AVUTIL_FFVERSION_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/fifo.h b/TMessagesProj/jni/ffmpeg/include/libavutil/fifo.h
new file mode 100644
index 000000000..dc7bc6f0d
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/fifo.h
@@ -0,0 +1,179 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * a very simple circular buffer FIFO implementation
+ */
+
+#ifndef AVUTIL_FIFO_H
+#define AVUTIL_FIFO_H
+
+#include
+#include "avutil.h"
+#include "attributes.h"
+
+typedef struct AVFifoBuffer {
+ uint8_t *buffer;
+ uint8_t *rptr, *wptr, *end;
+ uint32_t rndx, wndx;
+} AVFifoBuffer;
+
+/**
+ * Initialize an AVFifoBuffer.
+ * @param size of FIFO
+ * @return AVFifoBuffer or NULL in case of memory allocation failure
+ */
+AVFifoBuffer *av_fifo_alloc(unsigned int size);
+
+/**
+ * Initialize an AVFifoBuffer.
+ * @param nmemb number of elements
+ * @param size size of the single element
+ * @return AVFifoBuffer or NULL in case of memory allocation failure
+ */
+AVFifoBuffer *av_fifo_alloc_array(size_t nmemb, size_t size);
+
+/**
+ * Free an AVFifoBuffer.
+ * @param f AVFifoBuffer to free
+ */
+void av_fifo_free(AVFifoBuffer *f);
+
+/**
+ * Free an AVFifoBuffer and reset pointer to NULL.
+ * @param f AVFifoBuffer to free
+ */
+void av_fifo_freep(AVFifoBuffer **f);
+
+/**
+ * Reset the AVFifoBuffer to the state right after av_fifo_alloc, in particular it is emptied.
+ * @param f AVFifoBuffer to reset
+ */
+void av_fifo_reset(AVFifoBuffer *f);
+
+/**
+ * Return the amount of data in bytes in the AVFifoBuffer, that is the
+ * amount of data you can read from it.
+ * @param f AVFifoBuffer to read from
+ * @return size
+ */
+int av_fifo_size(const AVFifoBuffer *f);
+
+/**
+ * Return the amount of space in bytes in the AVFifoBuffer, that is the
+ * amount of data you can write into it.
+ * @param f AVFifoBuffer to write into
+ * @return size
+ */
+int av_fifo_space(const AVFifoBuffer *f);
+
+/**
+ * Feed data at specific position from an AVFifoBuffer to a user-supplied callback.
+ * Similar as av_fifo_gereric_read but without discarding data.
+ * @param f AVFifoBuffer to read from
+ * @param offset offset from current read position
+ * @param buf_size number of bytes to read
+ * @param func generic read function
+ * @param dest data destination
+ */
+int av_fifo_generic_peek_at(AVFifoBuffer *f, void *dest, int offset, int buf_size, void (*func)(void*, void*, int));
+
+/**
+ * Feed data from an AVFifoBuffer to a user-supplied callback.
+ * Similar as av_fifo_gereric_read but without discarding data.
+ * @param f AVFifoBuffer to read from
+ * @param buf_size number of bytes to read
+ * @param func generic read function
+ * @param dest data destination
+ */
+int av_fifo_generic_peek(AVFifoBuffer *f, void *dest, int buf_size, void (*func)(void*, void*, int));
+
+/**
+ * Feed data from an AVFifoBuffer to a user-supplied callback.
+ * @param f AVFifoBuffer to read from
+ * @param buf_size number of bytes to read
+ * @param func generic read function
+ * @param dest data destination
+ */
+int av_fifo_generic_read(AVFifoBuffer *f, void *dest, int buf_size, void (*func)(void*, void*, int));
+
+/**
+ * Feed data from a user-supplied callback to an AVFifoBuffer.
+ * @param f AVFifoBuffer to write to
+ * @param src data source; non-const since it may be used as a
+ * modifiable context by the function defined in func
+ * @param size number of bytes to write
+ * @param func generic write function; the first parameter is src,
+ * the second is dest_buf, the third is dest_buf_size.
+ * func must return the number of bytes written to dest_buf, or <= 0 to
+ * indicate no more data available to write.
+ * If func is NULL, src is interpreted as a simple byte array for source data.
+ * @return the number of bytes written to the FIFO
+ */
+int av_fifo_generic_write(AVFifoBuffer *f, void *src, int size, int (*func)(void*, void*, int));
+
+/**
+ * Resize an AVFifoBuffer.
+ * In case of reallocation failure, the old FIFO is kept unchanged.
+ *
+ * @param f AVFifoBuffer to resize
+ * @param size new AVFifoBuffer size in bytes
+ * @return <0 for failure, >=0 otherwise
+ */
+int av_fifo_realloc2(AVFifoBuffer *f, unsigned int size);
+
+/**
+ * Enlarge an AVFifoBuffer.
+ * In case of reallocation failure, the old FIFO is kept unchanged.
+ * The new fifo size may be larger than the requested size.
+ *
+ * @param f AVFifoBuffer to resize
+ * @param additional_space the amount of space in bytes to allocate in addition to av_fifo_size()
+ * @return <0 for failure, >=0 otherwise
+ */
+int av_fifo_grow(AVFifoBuffer *f, unsigned int additional_space);
+
+/**
+ * Read and discard the specified amount of data from an AVFifoBuffer.
+ * @param f AVFifoBuffer to read from
+ * @param size amount of data to read in bytes
+ */
+void av_fifo_drain(AVFifoBuffer *f, int size);
+
+/**
+ * Return a pointer to the data stored in a FIFO buffer at a certain offset.
+ * The FIFO buffer is not modified.
+ *
+ * @param f AVFifoBuffer to peek at, f must be non-NULL
+ * @param offs an offset in bytes, its absolute value must be less
+ * than the used buffer size or the returned pointer will
+ * point outside to the buffer data.
+ * The used buffer size can be checked with av_fifo_size().
+ */
+static inline uint8_t *av_fifo_peek2(const AVFifoBuffer *f, int offs)
+{
+ uint8_t *ptr = f->rptr + offs;
+ if (ptr >= f->end)
+ ptr = f->buffer + (ptr - f->end);
+ else if (ptr < f->buffer)
+ ptr = f->end - (f->buffer - ptr);
+ return ptr;
+}
+
+#endif /* AVUTIL_FIFO_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/file.h b/TMessagesProj/jni/ffmpeg/include/libavutil/file.h
new file mode 100644
index 000000000..e931be71e
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/file.h
@@ -0,0 +1,68 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_FILE_H
+#define AVUTIL_FILE_H
+
+#include
+
+#include "avutil.h"
+
+/**
+ * @file
+ * Misc file utilities.
+ */
+
+/**
+ * Read the file with name filename, and put its content in a newly
+ * allocated buffer or map it with mmap() when available.
+ * In case of success set *bufptr to the read or mmapped buffer, and
+ * *size to the size in bytes of the buffer in *bufptr.
+ * The returned buffer must be released with av_file_unmap().
+ *
+ * @param log_offset loglevel offset used for logging
+ * @param log_ctx context used for logging
+ * @return a non negative number in case of success, a negative value
+ * corresponding to an AVERROR error code in case of failure
+ */
+av_warn_unused_result
+int av_file_map(const char *filename, uint8_t **bufptr, size_t *size,
+ int log_offset, void *log_ctx);
+
+/**
+ * Unmap or free the buffer bufptr created by av_file_map().
+ *
+ * @param size size in bytes of bufptr, must be the same as returned
+ * by av_file_map()
+ */
+void av_file_unmap(uint8_t *bufptr, size_t size);
+
+/**
+ * Wrapper to work around the lack of mkstemp() on mingw.
+ * Also, tries to create file in /tmp first, if possible.
+ * *prefix can be a character constant; *filename will be allocated internally.
+ * @return file descriptor of opened file (or negative value corresponding to an
+ * AVERROR code on error)
+ * and opened file name in **filename.
+ * @note On very old libcs it is necessary to set a secure umask before
+ * calling this, av_tempfile() can't call umask itself as it is used in
+ * libraries and could interfere with the calling application.
+ */
+int av_tempfile(const char *prefix, char **filename, int log_offset, void *log_ctx);
+
+#endif /* AVUTIL_FILE_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/frame.h b/TMessagesProj/jni/ffmpeg/include/libavutil/frame.h
new file mode 100644
index 000000000..9c6061a11
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/frame.h
@@ -0,0 +1,713 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @ingroup lavu_frame
+ * reference-counted frame API
+ */
+
+#ifndef AVUTIL_FRAME_H
+#define AVUTIL_FRAME_H
+
+#include
+
+#include "avutil.h"
+#include "buffer.h"
+#include "dict.h"
+#include "rational.h"
+#include "samplefmt.h"
+#include "pixfmt.h"
+#include "version.h"
+
+
+/**
+ * @defgroup lavu_frame AVFrame
+ * @ingroup lavu_data
+ *
+ * @{
+ * AVFrame is an abstraction for reference-counted raw multimedia data.
+ */
+
+enum AVFrameSideDataType {
+ /**
+ * The data is the AVPanScan struct defined in libavcodec.
+ */
+ AV_FRAME_DATA_PANSCAN,
+ /**
+ * ATSC A53 Part 4 Closed Captions.
+ * A53 CC bitstream is stored as uint8_t in AVFrameSideData.data.
+ * The number of bytes of CC data is AVFrameSideData.size.
+ */
+ AV_FRAME_DATA_A53_CC,
+ /**
+ * Stereoscopic 3d metadata.
+ * The data is the AVStereo3D struct defined in libavutil/stereo3d.h.
+ */
+ AV_FRAME_DATA_STEREO3D,
+ /**
+ * The data is the AVMatrixEncoding enum defined in libavutil/channel_layout.h.
+ */
+ AV_FRAME_DATA_MATRIXENCODING,
+ /**
+ * Metadata relevant to a downmix procedure.
+ * The data is the AVDownmixInfo struct defined in libavutil/downmix_info.h.
+ */
+ AV_FRAME_DATA_DOWNMIX_INFO,
+ /**
+ * ReplayGain information in the form of the AVReplayGain struct.
+ */
+ AV_FRAME_DATA_REPLAYGAIN,
+ /**
+ * This side data contains a 3x3 transformation matrix describing an affine
+ * transformation that needs to be applied to the frame for correct
+ * presentation.
+ *
+ * See libavutil/display.h for a detailed description of the data.
+ */
+ AV_FRAME_DATA_DISPLAYMATRIX,
+ /**
+ * Active Format Description data consisting of a single byte as specified
+ * in ETSI TS 101 154 using AVActiveFormatDescription enum.
+ */
+ AV_FRAME_DATA_AFD,
+ /**
+ * Motion vectors exported by some codecs (on demand through the export_mvs
+ * flag set in the libavcodec AVCodecContext flags2 option).
+ * The data is the AVMotionVector struct defined in
+ * libavutil/motion_vector.h.
+ */
+ AV_FRAME_DATA_MOTION_VECTORS,
+ /**
+ * Recommmends skipping the specified number of samples. This is exported
+ * only if the "skip_manual" AVOption is set in libavcodec.
+ * This has the same format as AV_PKT_DATA_SKIP_SAMPLES.
+ * @code
+ * u32le number of samples to skip from start of this packet
+ * u32le number of samples to skip from end of this packet
+ * u8 reason for start skip
+ * u8 reason for end skip (0=padding silence, 1=convergence)
+ * @endcode
+ */
+ AV_FRAME_DATA_SKIP_SAMPLES,
+
+ /**
+ * This side data must be associated with an audio frame and corresponds to
+ * enum AVAudioServiceType defined in avcodec.h.
+ */
+ AV_FRAME_DATA_AUDIO_SERVICE_TYPE,
+};
+
+enum AVActiveFormatDescription {
+ AV_AFD_SAME = 8,
+ AV_AFD_4_3 = 9,
+ AV_AFD_16_9 = 10,
+ AV_AFD_14_9 = 11,
+ AV_AFD_4_3_SP_14_9 = 13,
+ AV_AFD_16_9_SP_14_9 = 14,
+ AV_AFD_SP_4_3 = 15,
+};
+
+
+/**
+ * Structure to hold side data for an AVFrame.
+ *
+ * sizeof(AVFrameSideData) is not a part of the public ABI, so new fields may be added
+ * to the end with a minor bump.
+ */
+typedef struct AVFrameSideData {
+ enum AVFrameSideDataType type;
+ uint8_t *data;
+ int size;
+ AVDictionary *metadata;
+ AVBufferRef *buf;
+} AVFrameSideData;
+
+/**
+ * This structure describes decoded (raw) audio or video data.
+ *
+ * AVFrame must be allocated using av_frame_alloc(). Note that this only
+ * allocates the AVFrame itself, the buffers for the data must be managed
+ * through other means (see below).
+ * AVFrame must be freed with av_frame_free().
+ *
+ * AVFrame is typically allocated once and then reused multiple times to hold
+ * different data (e.g. a single AVFrame to hold frames received from a
+ * decoder). In such a case, av_frame_unref() will free any references held by
+ * the frame and reset it to its original clean state before it
+ * is reused again.
+ *
+ * The data described by an AVFrame is usually reference counted through the
+ * AVBuffer API. The underlying buffer references are stored in AVFrame.buf /
+ * AVFrame.extended_buf. An AVFrame is considered to be reference counted if at
+ * least one reference is set, i.e. if AVFrame.buf[0] != NULL. In such a case,
+ * every single data plane must be contained in one of the buffers in
+ * AVFrame.buf or AVFrame.extended_buf.
+ * There may be a single buffer for all the data, or one separate buffer for
+ * each plane, or anything in between.
+ *
+ * sizeof(AVFrame) is not a part of the public ABI, so new fields may be added
+ * to the end with a minor bump.
+ * Similarly fields that are marked as to be only accessed by
+ * av_opt_ptr() can be reordered. This allows 2 forks to add fields
+ * without breaking compatibility with each other.
+ */
+typedef struct AVFrame {
+#define AV_NUM_DATA_POINTERS 8
+ /**
+ * pointer to the picture/channel planes.
+ * This might be different from the first allocated byte
+ *
+ * Some decoders access areas outside 0,0 - width,height, please
+ * see avcodec_align_dimensions2(). Some filters and swscale can read
+ * up to 16 bytes beyond the planes, if these filters are to be used,
+ * then 16 extra bytes must be allocated.
+ */
+ uint8_t *data[AV_NUM_DATA_POINTERS];
+
+ /**
+ * For video, size in bytes of each picture line.
+ * For audio, size in bytes of each plane.
+ *
+ * For audio, only linesize[0] may be set. For planar audio, each channel
+ * plane must be the same size.
+ *
+ * For video the linesizes should be multiples of the CPUs alignment
+ * preference, this is 16 or 32 for modern desktop CPUs.
+ * Some code requires such alignment other code can be slower without
+ * correct alignment, for yet other it makes no difference.
+ *
+ * @note The linesize may be larger than the size of usable data -- there
+ * may be extra padding present for performance reasons.
+ */
+ int linesize[AV_NUM_DATA_POINTERS];
+
+ /**
+ * pointers to the data planes/channels.
+ *
+ * For video, this should simply point to data[].
+ *
+ * For planar audio, each channel has a separate data pointer, and
+ * linesize[0] contains the size of each channel buffer.
+ * For packed audio, there is just one data pointer, and linesize[0]
+ * contains the total size of the buffer for all channels.
+ *
+ * Note: Both data and extended_data should always be set in a valid frame,
+ * but for planar audio with more channels that can fit in data,
+ * extended_data must be used in order to access all channels.
+ */
+ uint8_t **extended_data;
+
+ /**
+ * width and height of the video frame
+ */
+ int width, height;
+
+ /**
+ * number of audio samples (per channel) described by this frame
+ */
+ int nb_samples;
+
+ /**
+ * format of the frame, -1 if unknown or unset
+ * Values correspond to enum AVPixelFormat for video frames,
+ * enum AVSampleFormat for audio)
+ */
+ int format;
+
+ /**
+ * 1 -> keyframe, 0-> not
+ */
+ int key_frame;
+
+ /**
+ * Picture type of the frame.
+ */
+ enum AVPictureType pict_type;
+
+ /**
+ * Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
+ */
+ AVRational sample_aspect_ratio;
+
+ /**
+ * Presentation timestamp in time_base units (time when frame should be shown to user).
+ */
+ int64_t pts;
+
+ /**
+ * PTS copied from the AVPacket that was decoded to produce this frame.
+ */
+ int64_t pkt_pts;
+
+ /**
+ * DTS copied from the AVPacket that triggered returning this frame. (if frame threading isn't used)
+ * This is also the Presentation time of this AVFrame calculated from
+ * only AVPacket.dts values without pts values.
+ */
+ int64_t pkt_dts;
+
+ /**
+ * picture number in bitstream order
+ */
+ int coded_picture_number;
+ /**
+ * picture number in display order
+ */
+ int display_picture_number;
+
+ /**
+ * quality (between 1 (good) and FF_LAMBDA_MAX (bad))
+ */
+ int quality;
+
+ /**
+ * for some private data of the user
+ */
+ void *opaque;
+
+#if FF_API_ERROR_FRAME
+ /**
+ * @deprecated unused
+ */
+ attribute_deprecated
+ uint64_t error[AV_NUM_DATA_POINTERS];
+#endif
+
+ /**
+ * When decoding, this signals how much the picture must be delayed.
+ * extra_delay = repeat_pict / (2*fps)
+ */
+ int repeat_pict;
+
+ /**
+ * The content of the picture is interlaced.
+ */
+ int interlaced_frame;
+
+ /**
+ * If the content is interlaced, is top field displayed first.
+ */
+ int top_field_first;
+
+ /**
+ * Tell user application that palette has changed from previous frame.
+ */
+ int palette_has_changed;
+
+ /**
+ * reordered opaque 64bit (generally an integer or a double precision float
+ * PTS but can be anything).
+ * The user sets AVCodecContext.reordered_opaque to represent the input at
+ * that time,
+ * the decoder reorders values as needed and sets AVFrame.reordered_opaque
+ * to exactly one of the values provided by the user through AVCodecContext.reordered_opaque
+ * @deprecated in favor of pkt_pts
+ */
+ int64_t reordered_opaque;
+
+ /**
+ * Sample rate of the audio data.
+ */
+ int sample_rate;
+
+ /**
+ * Channel layout of the audio data.
+ */
+ uint64_t channel_layout;
+
+ /**
+ * AVBuffer references backing the data for this frame. If all elements of
+ * this array are NULL, then this frame is not reference counted. This array
+ * must be filled contiguously -- if buf[i] is non-NULL then buf[j] must
+ * also be non-NULL for all j < i.
+ *
+ * There may be at most one AVBuffer per data plane, so for video this array
+ * always contains all the references. For planar audio with more than
+ * AV_NUM_DATA_POINTERS channels, there may be more buffers than can fit in
+ * this array. Then the extra AVBufferRef pointers are stored in the
+ * extended_buf array.
+ */
+ AVBufferRef *buf[AV_NUM_DATA_POINTERS];
+
+ /**
+ * For planar audio which requires more than AV_NUM_DATA_POINTERS
+ * AVBufferRef pointers, this array will hold all the references which
+ * cannot fit into AVFrame.buf.
+ *
+ * Note that this is different from AVFrame.extended_data, which always
+ * contains all the pointers. This array only contains the extra pointers,
+ * which cannot fit into AVFrame.buf.
+ *
+ * This array is always allocated using av_malloc() by whoever constructs
+ * the frame. It is freed in av_frame_unref().
+ */
+ AVBufferRef **extended_buf;
+ /**
+ * Number of elements in extended_buf.
+ */
+ int nb_extended_buf;
+
+ AVFrameSideData **side_data;
+ int nb_side_data;
+
+/**
+ * @defgroup lavu_frame_flags AV_FRAME_FLAGS
+ * Flags describing additional frame properties.
+ *
+ * @{
+ */
+
+/**
+ * The frame data may be corrupted, e.g. due to decoding errors.
+ */
+#define AV_FRAME_FLAG_CORRUPT (1 << 0)
+/**
+ * @}
+ */
+
+ /**
+ * Frame flags, a combination of @ref lavu_frame_flags
+ */
+ int flags;
+
+ /**
+ * MPEG vs JPEG YUV range.
+ * It must be accessed using av_frame_get_color_range() and
+ * av_frame_set_color_range().
+ * - encoding: Set by user
+ * - decoding: Set by libavcodec
+ */
+ enum AVColorRange color_range;
+
+ enum AVColorPrimaries color_primaries;
+
+ enum AVColorTransferCharacteristic color_trc;
+
+ /**
+ * YUV colorspace type.
+ * It must be accessed using av_frame_get_colorspace() and
+ * av_frame_set_colorspace().
+ * - encoding: Set by user
+ * - decoding: Set by libavcodec
+ */
+ enum AVColorSpace colorspace;
+
+ enum AVChromaLocation chroma_location;
+
+ /**
+ * frame timestamp estimated using various heuristics, in stream time base
+ * Code outside libavutil should access this field using:
+ * av_frame_get_best_effort_timestamp(frame)
+ * - encoding: unused
+ * - decoding: set by libavcodec, read by user.
+ */
+ int64_t best_effort_timestamp;
+
+ /**
+ * reordered pos from the last AVPacket that has been input into the decoder
+ * Code outside libavutil should access this field using:
+ * av_frame_get_pkt_pos(frame)
+ * - encoding: unused
+ * - decoding: Read by user.
+ */
+ int64_t pkt_pos;
+
+ /**
+ * duration of the corresponding packet, expressed in
+ * AVStream->time_base units, 0 if unknown.
+ * Code outside libavutil should access this field using:
+ * av_frame_get_pkt_duration(frame)
+ * - encoding: unused
+ * - decoding: Read by user.
+ */
+ int64_t pkt_duration;
+
+ /**
+ * metadata.
+ * Code outside libavutil should access this field using:
+ * av_frame_get_metadata(frame)
+ * - encoding: Set by user.
+ * - decoding: Set by libavcodec.
+ */
+ AVDictionary *metadata;
+
+ /**
+ * decode error flags of the frame, set to a combination of
+ * FF_DECODE_ERROR_xxx flags if the decoder produced a frame, but there
+ * were errors during the decoding.
+ * Code outside libavutil should access this field using:
+ * av_frame_get_decode_error_flags(frame)
+ * - encoding: unused
+ * - decoding: set by libavcodec, read by user.
+ */
+ int decode_error_flags;
+#define FF_DECODE_ERROR_INVALID_BITSTREAM 1
+#define FF_DECODE_ERROR_MISSING_REFERENCE 2
+
+ /**
+ * number of audio channels, only used for audio.
+ * Code outside libavutil should access this field using:
+ * av_frame_get_channels(frame)
+ * - encoding: unused
+ * - decoding: Read by user.
+ */
+ int channels;
+
+ /**
+ * size of the corresponding packet containing the compressed
+ * frame. It must be accessed using av_frame_get_pkt_size() and
+ * av_frame_set_pkt_size().
+ * It is set to a negative value if unknown.
+ * - encoding: unused
+ * - decoding: set by libavcodec, read by user.
+ */
+ int pkt_size;
+
+#if FF_API_FRAME_QP
+ /**
+ * QP table
+ * Not to be accessed directly from outside libavutil
+ */
+ attribute_deprecated
+ int8_t *qscale_table;
+ /**
+ * QP store stride
+ * Not to be accessed directly from outside libavutil
+ */
+ attribute_deprecated
+ int qstride;
+
+ attribute_deprecated
+ int qscale_type;
+
+ /**
+ * Not to be accessed directly from outside libavutil
+ */
+ AVBufferRef *qp_table_buf;
+#endif
+} AVFrame;
+
+/**
+ * Accessors for some AVFrame fields.
+ * The position of these field in the structure is not part of the ABI,
+ * they should not be accessed directly outside libavutil.
+ */
+int64_t av_frame_get_best_effort_timestamp(const AVFrame *frame);
+void av_frame_set_best_effort_timestamp(AVFrame *frame, int64_t val);
+int64_t av_frame_get_pkt_duration (const AVFrame *frame);
+void av_frame_set_pkt_duration (AVFrame *frame, int64_t val);
+int64_t av_frame_get_pkt_pos (const AVFrame *frame);
+void av_frame_set_pkt_pos (AVFrame *frame, int64_t val);
+int64_t av_frame_get_channel_layout (const AVFrame *frame);
+void av_frame_set_channel_layout (AVFrame *frame, int64_t val);
+int av_frame_get_channels (const AVFrame *frame);
+void av_frame_set_channels (AVFrame *frame, int val);
+int av_frame_get_sample_rate (const AVFrame *frame);
+void av_frame_set_sample_rate (AVFrame *frame, int val);
+AVDictionary *av_frame_get_metadata (const AVFrame *frame);
+void av_frame_set_metadata (AVFrame *frame, AVDictionary *val);
+int av_frame_get_decode_error_flags (const AVFrame *frame);
+void av_frame_set_decode_error_flags (AVFrame *frame, int val);
+int av_frame_get_pkt_size(const AVFrame *frame);
+void av_frame_set_pkt_size(AVFrame *frame, int val);
+AVDictionary **avpriv_frame_get_metadatap(AVFrame *frame);
+#if FF_API_FRAME_QP
+int8_t *av_frame_get_qp_table(AVFrame *f, int *stride, int *type);
+int av_frame_set_qp_table(AVFrame *f, AVBufferRef *buf, int stride, int type);
+#endif
+enum AVColorSpace av_frame_get_colorspace(const AVFrame *frame);
+void av_frame_set_colorspace(AVFrame *frame, enum AVColorSpace val);
+enum AVColorRange av_frame_get_color_range(const AVFrame *frame);
+void av_frame_set_color_range(AVFrame *frame, enum AVColorRange val);
+
+/**
+ * Get the name of a colorspace.
+ * @return a static string identifying the colorspace; can be NULL.
+ */
+const char *av_get_colorspace_name(enum AVColorSpace val);
+
+/**
+ * Allocate an AVFrame and set its fields to default values. The resulting
+ * struct must be freed using av_frame_free().
+ *
+ * @return An AVFrame filled with default values or NULL on failure.
+ *
+ * @note this only allocates the AVFrame itself, not the data buffers. Those
+ * must be allocated through other means, e.g. with av_frame_get_buffer() or
+ * manually.
+ */
+AVFrame *av_frame_alloc(void);
+
+/**
+ * Free the frame and any dynamically allocated objects in it,
+ * e.g. extended_data. If the frame is reference counted, it will be
+ * unreferenced first.
+ *
+ * @param frame frame to be freed. The pointer will be set to NULL.
+ */
+void av_frame_free(AVFrame **frame);
+
+/**
+ * Set up a new reference to the data described by the source frame.
+ *
+ * Copy frame properties from src to dst and create a new reference for each
+ * AVBufferRef from src.
+ *
+ * If src is not reference counted, new buffers are allocated and the data is
+ * copied.
+ *
+ * @return 0 on success, a negative AVERROR on error
+ */
+int av_frame_ref(AVFrame *dst, const AVFrame *src);
+
+/**
+ * Create a new frame that references the same data as src.
+ *
+ * This is a shortcut for av_frame_alloc()+av_frame_ref().
+ *
+ * @return newly created AVFrame on success, NULL on error.
+ */
+AVFrame *av_frame_clone(const AVFrame *src);
+
+/**
+ * Unreference all the buffers referenced by frame and reset the frame fields.
+ */
+void av_frame_unref(AVFrame *frame);
+
+/**
+ * Move everything contained in src to dst and reset src.
+ */
+void av_frame_move_ref(AVFrame *dst, AVFrame *src);
+
+/**
+ * Allocate new buffer(s) for audio or video data.
+ *
+ * The following fields must be set on frame before calling this function:
+ * - format (pixel format for video, sample format for audio)
+ * - width and height for video
+ * - nb_samples and channel_layout for audio
+ *
+ * This function will fill AVFrame.data and AVFrame.buf arrays and, if
+ * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
+ * For planar formats, one buffer will be allocated for each plane.
+ *
+ * @param frame frame in which to store the new buffers.
+ * @param align required buffer size alignment
+ *
+ * @return 0 on success, a negative AVERROR on error.
+ */
+int av_frame_get_buffer(AVFrame *frame, int align);
+
+/**
+ * Check if the frame data is writable.
+ *
+ * @return A positive value if the frame data is writable (which is true if and
+ * only if each of the underlying buffers has only one reference, namely the one
+ * stored in this frame). Return 0 otherwise.
+ *
+ * If 1 is returned the answer is valid until av_buffer_ref() is called on any
+ * of the underlying AVBufferRefs (e.g. through av_frame_ref() or directly).
+ *
+ * @see av_frame_make_writable(), av_buffer_is_writable()
+ */
+int av_frame_is_writable(AVFrame *frame);
+
+/**
+ * Ensure that the frame data is writable, avoiding data copy if possible.
+ *
+ * Do nothing if the frame is writable, allocate new buffers and copy the data
+ * if it is not.
+ *
+ * @return 0 on success, a negative AVERROR on error.
+ *
+ * @see av_frame_is_writable(), av_buffer_is_writable(),
+ * av_buffer_make_writable()
+ */
+int av_frame_make_writable(AVFrame *frame);
+
+/**
+ * Copy the frame data from src to dst.
+ *
+ * This function does not allocate anything, dst must be already initialized and
+ * allocated with the same parameters as src.
+ *
+ * This function only copies the frame data (i.e. the contents of the data /
+ * extended data arrays), not any other properties.
+ *
+ * @return >= 0 on success, a negative AVERROR on error.
+ */
+int av_frame_copy(AVFrame *dst, const AVFrame *src);
+
+/**
+ * Copy only "metadata" fields from src to dst.
+ *
+ * Metadata for the purpose of this function are those fields that do not affect
+ * the data layout in the buffers. E.g. pts, sample rate (for audio) or sample
+ * aspect ratio (for video), but not width/height or channel layout.
+ * Side data is also copied.
+ */
+int av_frame_copy_props(AVFrame *dst, const AVFrame *src);
+
+/**
+ * Get the buffer reference a given data plane is stored in.
+ *
+ * @param plane index of the data plane of interest in frame->extended_data.
+ *
+ * @return the buffer reference that contains the plane or NULL if the input
+ * frame is not valid.
+ */
+AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane);
+
+/**
+ * Add a new side data to a frame.
+ *
+ * @param frame a frame to which the side data should be added
+ * @param type type of the added side data
+ * @param size size of the side data
+ *
+ * @return newly added side data on success, NULL on error
+ */
+AVFrameSideData *av_frame_new_side_data(AVFrame *frame,
+ enum AVFrameSideDataType type,
+ int size);
+
+/**
+ * @return a pointer to the side data of a given type on success, NULL if there
+ * is no side data with such type in this frame.
+ */
+AVFrameSideData *av_frame_get_side_data(const AVFrame *frame,
+ enum AVFrameSideDataType type);
+
+/**
+ * If side data of the supplied type exists in the frame, free it and remove it
+ * from the frame.
+ */
+void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type);
+
+/**
+ * @return a string identifying the side data type
+ */
+const char *av_frame_side_data_name(enum AVFrameSideDataType type);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_FRAME_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/hash.h b/TMessagesProj/jni/ffmpeg/include/libavutil/hash.h
new file mode 100644
index 000000000..d4bcbf8cc
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/hash.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2013 Reimar Döffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HASH_H
+#define AVUTIL_HASH_H
+
+#include
+
+struct AVHashContext;
+
+/**
+ * Allocate a hash context for the algorithm specified by name.
+ *
+ * @return >= 0 for success, a negative error code for failure
+ * @note The context is not initialized, you must call av_hash_init().
+ */
+int av_hash_alloc(struct AVHashContext **ctx, const char *name);
+
+/**
+ * Get the names of available hash algorithms.
+ *
+ * This function can be used to enumerate the algorithms.
+ *
+ * @param i index of the hash algorithm, starting from 0
+ * @return a pointer to a static string or NULL if i is out of range
+ */
+const char *av_hash_names(int i);
+
+/**
+ * Get the name of the algorithm corresponding to the given hash context.
+ */
+const char *av_hash_get_name(const struct AVHashContext *ctx);
+
+/**
+ * Maximum value that av_hash_get_size will currently return.
+ *
+ * You can use this if you absolutely want or need to use static allocation
+ * and are fine with not supporting hashes newly added to libavutil without
+ * recompilation.
+ * Note that you still need to check against av_hash_get_size, adding new hashes
+ * with larger sizes will not be considered an ABI change and should not cause
+ * your code to overflow a buffer.
+ */
+#define AV_HASH_MAX_SIZE 64
+
+/**
+ * Get the size of the resulting hash value in bytes.
+ *
+ * The pointer passed to av_hash_final have space for at least this many bytes.
+ */
+int av_hash_get_size(const struct AVHashContext *ctx);
+
+/**
+ * Initialize or reset a hash context.
+ */
+void av_hash_init(struct AVHashContext *ctx);
+
+/**
+ * Update a hash context with additional data.
+ */
+void av_hash_update(struct AVHashContext *ctx, const uint8_t *src, int len);
+
+/**
+ * Finalize a hash context and compute the actual hash value.
+ */
+void av_hash_final(struct AVHashContext *ctx, uint8_t *dst);
+
+/**
+ * Finalize a hash context and compute the actual hash value.
+ * If size is smaller than the hash size, the hash is truncated;
+ * if size is larger, the buffer is padded with 0.
+ */
+void av_hash_final_bin(struct AVHashContext *ctx, uint8_t *dst, int size);
+
+/**
+ * Finalize a hash context and compute the actual hash value as a hex string.
+ * The string is always 0-terminated.
+ * If size is smaller than 2 * hash_size + 1, the hex string is truncated.
+ */
+void av_hash_final_hex(struct AVHashContext *ctx, uint8_t *dst, int size);
+
+/**
+ * Finalize a hash context and compute the actual hash value as a base64 string.
+ * The string is always 0-terminated.
+ * If size is smaller than AV_BASE64_SIZE(hash_size), the base64 string is
+ * truncated.
+ */
+void av_hash_final_b64(struct AVHashContext *ctx, uint8_t *dst, int size);
+
+/**
+ * Free hash context.
+ */
+void av_hash_freep(struct AVHashContext **ctx);
+
+#endif /* AVUTIL_HASH_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/hmac.h b/TMessagesProj/jni/ffmpeg/include/libavutil/hmac.h
new file mode 100644
index 000000000..576a0a4fb
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/hmac.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HMAC_H
+#define AVUTIL_HMAC_H
+
+#include
+
+#include "version.h"
+/**
+ * @defgroup lavu_hmac HMAC
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+enum AVHMACType {
+ AV_HMAC_MD5,
+ AV_HMAC_SHA1,
+ AV_HMAC_SHA224,
+ AV_HMAC_SHA256,
+ AV_HMAC_SHA384 = 12,
+ AV_HMAC_SHA512,
+};
+
+typedef struct AVHMAC AVHMAC;
+
+/**
+ * Allocate an AVHMAC context.
+ * @param type The hash function used for the HMAC.
+ */
+AVHMAC *av_hmac_alloc(enum AVHMACType type);
+
+/**
+ * Free an AVHMAC context.
+ * @param ctx The context to free, may be NULL
+ */
+void av_hmac_free(AVHMAC *ctx);
+
+/**
+ * Initialize an AVHMAC context with an authentication key.
+ * @param ctx The HMAC context
+ * @param key The authentication key
+ * @param keylen The length of the key, in bytes
+ */
+void av_hmac_init(AVHMAC *ctx, const uint8_t *key, unsigned int keylen);
+
+/**
+ * Hash data with the HMAC.
+ * @param ctx The HMAC context
+ * @param data The data to hash
+ * @param len The length of the data, in bytes
+ */
+void av_hmac_update(AVHMAC *ctx, const uint8_t *data, unsigned int len);
+
+/**
+ * Finish hashing and output the HMAC digest.
+ * @param ctx The HMAC context
+ * @param out The output buffer to write the digest into
+ * @param outlen The length of the out buffer, in bytes
+ * @return The number of bytes written to out, or a negative error code.
+ */
+int av_hmac_final(AVHMAC *ctx, uint8_t *out, unsigned int outlen);
+
+/**
+ * Hash an array of data with a key.
+ * @param ctx The HMAC context
+ * @param data The data to hash
+ * @param len The length of the data, in bytes
+ * @param key The authentication key
+ * @param keylen The length of the key, in bytes
+ * @param out The output buffer to write the digest into
+ * @param outlen The length of the out buffer, in bytes
+ * @return The number of bytes written to out, or a negative error code.
+ */
+int av_hmac_calc(AVHMAC *ctx, const uint8_t *data, unsigned int len,
+ const uint8_t *key, unsigned int keylen,
+ uint8_t *out, unsigned int outlen);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_HMAC_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/imgutils.h b/TMessagesProj/jni/ffmpeg/include/libavutil/imgutils.h
new file mode 100644
index 000000000..23282a38f
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/imgutils.h
@@ -0,0 +1,213 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_IMGUTILS_H
+#define AVUTIL_IMGUTILS_H
+
+/**
+ * @file
+ * misc image utilities
+ *
+ * @addtogroup lavu_picture
+ * @{
+ */
+
+#include "avutil.h"
+#include "pixdesc.h"
+#include "rational.h"
+
+/**
+ * Compute the max pixel step for each plane of an image with a
+ * format described by pixdesc.
+ *
+ * The pixel step is the distance in bytes between the first byte of
+ * the group of bytes which describe a pixel component and the first
+ * byte of the successive group in the same plane for the same
+ * component.
+ *
+ * @param max_pixsteps an array which is filled with the max pixel step
+ * for each plane. Since a plane may contain different pixel
+ * components, the computed max_pixsteps[plane] is relative to the
+ * component in the plane with the max pixel step.
+ * @param max_pixstep_comps an array which is filled with the component
+ * for each plane which has the max pixel step. May be NULL.
+ */
+void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
+ const AVPixFmtDescriptor *pixdesc);
+
+/**
+ * Compute the size of an image line with format pix_fmt and width
+ * width for the plane plane.
+ *
+ * @return the computed size in bytes
+ */
+int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane);
+
+/**
+ * Fill plane linesizes for an image with pixel format pix_fmt and
+ * width width.
+ *
+ * @param linesizes array to be filled with the linesize for each plane
+ * @return >= 0 in case of success, a negative error code otherwise
+ */
+int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width);
+
+/**
+ * Fill plane data pointers for an image with pixel format pix_fmt and
+ * height height.
+ *
+ * @param data pointers array to be filled with the pointer for each image plane
+ * @param ptr the pointer to a buffer which will contain the image
+ * @param linesizes the array containing the linesize for each
+ * plane, should be filled by av_image_fill_linesizes()
+ * @return the size in bytes required for the image buffer, a negative
+ * error code in case of failure
+ */
+int av_image_fill_pointers(uint8_t *data[4], enum AVPixelFormat pix_fmt, int height,
+ uint8_t *ptr, const int linesizes[4]);
+
+/**
+ * Allocate an image with size w and h and pixel format pix_fmt, and
+ * fill pointers and linesizes accordingly.
+ * The allocated image buffer has to be freed by using
+ * av_freep(&pointers[0]).
+ *
+ * @param align the value to use for buffer size alignment
+ * @return the size in bytes required for the image buffer, a negative
+ * error code in case of failure
+ */
+int av_image_alloc(uint8_t *pointers[4], int linesizes[4],
+ int w, int h, enum AVPixelFormat pix_fmt, int align);
+
+/**
+ * Copy image plane from src to dst.
+ * That is, copy "height" number of lines of "bytewidth" bytes each.
+ * The first byte of each successive line is separated by *_linesize
+ * bytes.
+ *
+ * bytewidth must be contained by both absolute values of dst_linesize
+ * and src_linesize, otherwise the function behavior is undefined.
+ *
+ * @param dst_linesize linesize for the image plane in dst
+ * @param src_linesize linesize for the image plane in src
+ */
+void av_image_copy_plane(uint8_t *dst, int dst_linesize,
+ const uint8_t *src, int src_linesize,
+ int bytewidth, int height);
+
+/**
+ * Copy image in src_data to dst_data.
+ *
+ * @param dst_linesizes linesizes for the image in dst_data
+ * @param src_linesizes linesizes for the image in src_data
+ */
+void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
+ const uint8_t *src_data[4], const int src_linesizes[4],
+ enum AVPixelFormat pix_fmt, int width, int height);
+
+/**
+ * Setup the data pointers and linesizes based on the specified image
+ * parameters and the provided array.
+ *
+ * The fields of the given image are filled in by using the src
+ * address which points to the image data buffer. Depending on the
+ * specified pixel format, one or multiple image data pointers and
+ * line sizes will be set. If a planar format is specified, several
+ * pointers will be set pointing to the different picture planes and
+ * the line sizes of the different planes will be stored in the
+ * lines_sizes array. Call with src == NULL to get the required
+ * size for the src buffer.
+ *
+ * To allocate the buffer and fill in the dst_data and dst_linesize in
+ * one call, use av_image_alloc().
+ *
+ * @param dst_data data pointers to be filled in
+ * @param dst_linesizes linesizes for the image in dst_data to be filled in
+ * @param src buffer which will contain or contains the actual image data, can be NULL
+ * @param pix_fmt the pixel format of the image
+ * @param width the width of the image in pixels
+ * @param height the height of the image in pixels
+ * @param align the value used in src for linesize alignment
+ * @return the size in bytes required for src, a negative error code
+ * in case of failure
+ */
+int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4],
+ const uint8_t *src,
+ enum AVPixelFormat pix_fmt, int width, int height, int align);
+
+/**
+ * Return the size in bytes of the amount of data required to store an
+ * image with the given parameters.
+ *
+ * @param[in] align the assumed linesize alignment
+ */
+int av_image_get_buffer_size(enum AVPixelFormat pix_fmt, int width, int height, int align);
+
+/**
+ * Copy image data from an image into a buffer.
+ *
+ * av_image_get_buffer_size() can be used to compute the required size
+ * for the buffer to fill.
+ *
+ * @param dst a buffer into which picture data will be copied
+ * @param dst_size the size in bytes of dst
+ * @param src_data pointers containing the source image data
+ * @param src_linesizes linesizes for the image in src_data
+ * @param pix_fmt the pixel format of the source image
+ * @param width the width of the source image in pixels
+ * @param height the height of the source image in pixels
+ * @param align the assumed linesize alignment for dst
+ * @return the number of bytes written to dst, or a negative value
+ * (error code) on error
+ */
+int av_image_copy_to_buffer(uint8_t *dst, int dst_size,
+ const uint8_t * const src_data[4], const int src_linesize[4],
+ enum AVPixelFormat pix_fmt, int width, int height, int align);
+
+/**
+ * Check if the given dimension of an image is valid, meaning that all
+ * bytes of the image can be addressed with a signed int.
+ *
+ * @param w the width of the picture
+ * @param h the height of the picture
+ * @param log_offset the offset to sum to the log level for logging with log_ctx
+ * @param log_ctx the parent logging context, it may be NULL
+ * @return >= 0 if valid, a negative error code otherwise
+ */
+int av_image_check_size(unsigned int w, unsigned int h, int log_offset, void *log_ctx);
+
+/**
+ * Check if the given sample aspect ratio of an image is valid.
+ *
+ * It is considered invalid if the denominator is 0 or if applying the ratio
+ * to the image size would make the smaller dimension less than 1. If the
+ * sar numerator is 0, it is considered unknown and will return as valid.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param sar sample aspect ratio of the image
+ * @return 0 if valid, a negative AVERROR code otherwise
+ */
+int av_image_check_sar(unsigned int w, unsigned int h, AVRational sar);
+
+/**
+ * @}
+ */
+
+
+#endif /* AVUTIL_IMGUTILS_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/intfloat.h b/TMessagesProj/jni/ffmpeg/include/libavutil/intfloat.h
new file mode 100644
index 000000000..fe3d7ec4a
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/intfloat.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_INTFLOAT_H
+#define AVUTIL_INTFLOAT_H
+
+#include
+#include "attributes.h"
+
+union av_intfloat32 {
+ uint32_t i;
+ float f;
+};
+
+union av_intfloat64 {
+ uint64_t i;
+ double f;
+};
+
+/**
+ * Reinterpret a 32-bit integer as a float.
+ */
+static av_always_inline float av_int2float(uint32_t i)
+{
+ union av_intfloat32 v;
+ v.i = i;
+ return v.f;
+}
+
+/**
+ * Reinterpret a float as a 32-bit integer.
+ */
+static av_always_inline uint32_t av_float2int(float f)
+{
+ union av_intfloat32 v;
+ v.f = f;
+ return v.i;
+}
+
+/**
+ * Reinterpret a 64-bit integer as a double.
+ */
+static av_always_inline double av_int2double(uint64_t i)
+{
+ union av_intfloat64 v;
+ v.i = i;
+ return v.f;
+}
+
+/**
+ * Reinterpret a double as a 64-bit integer.
+ */
+static av_always_inline uint64_t av_double2int(double f)
+{
+ union av_intfloat64 v;
+ v.f = f;
+ return v.i;
+}
+
+#endif /* AVUTIL_INTFLOAT_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/intreadwrite.h b/TMessagesProj/jni/ffmpeg/include/libavutil/intreadwrite.h
new file mode 100644
index 000000000..51fbe30a2
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/intreadwrite.h
@@ -0,0 +1,629 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_INTREADWRITE_H
+#define AVUTIL_INTREADWRITE_H
+
+#include
+#include "libavutil/avconfig.h"
+#include "attributes.h"
+#include "bswap.h"
+
+typedef union {
+ uint64_t u64;
+ uint32_t u32[2];
+ uint16_t u16[4];
+ uint8_t u8 [8];
+ double f64;
+ float f32[2];
+} av_alias av_alias64;
+
+typedef union {
+ uint32_t u32;
+ uint16_t u16[2];
+ uint8_t u8 [4];
+ float f32;
+} av_alias av_alias32;
+
+typedef union {
+ uint16_t u16;
+ uint8_t u8 [2];
+} av_alias av_alias16;
+
+/*
+ * Arch-specific headers can provide any combination of
+ * AV_[RW][BLN](16|24|32|48|64) and AV_(COPY|SWAP|ZERO)(64|128) macros.
+ * Preprocessor symbols must be defined, even if these are implemented
+ * as inline functions.
+ *
+ * R/W means read/write, B/L/N means big/little/native endianness.
+ * The following macros require aligned access, compared to their
+ * unaligned variants: AV_(COPY|SWAP|ZERO)(64|128), AV_[RW]N[8-64]A.
+ * Incorrect usage may range from abysmal performance to crash
+ * depending on the platform.
+ *
+ * The unaligned variants are AV_[RW][BLN][8-64] and AV_COPY*U.
+ */
+
+#ifdef HAVE_AV_CONFIG_H
+
+#include "config.h"
+
+#if ARCH_ARM
+# include "arm/intreadwrite.h"
+#elif ARCH_AVR32
+# include "avr32/intreadwrite.h"
+#elif ARCH_MIPS
+# include "mips/intreadwrite.h"
+#elif ARCH_PPC
+# include "ppc/intreadwrite.h"
+#elif ARCH_TOMI
+# include "tomi/intreadwrite.h"
+#elif ARCH_X86
+# include "x86/intreadwrite.h"
+#endif
+
+#endif /* HAVE_AV_CONFIG_H */
+
+/*
+ * Map AV_RNXX <-> AV_R[BL]XX for all variants provided by per-arch headers.
+ */
+
+#if AV_HAVE_BIGENDIAN
+
+# if defined(AV_RN16) && !defined(AV_RB16)
+# define AV_RB16(p) AV_RN16(p)
+# elif !defined(AV_RN16) && defined(AV_RB16)
+# define AV_RN16(p) AV_RB16(p)
+# endif
+
+# if defined(AV_WN16) && !defined(AV_WB16)
+# define AV_WB16(p, v) AV_WN16(p, v)
+# elif !defined(AV_WN16) && defined(AV_WB16)
+# define AV_WN16(p, v) AV_WB16(p, v)
+# endif
+
+# if defined(AV_RN24) && !defined(AV_RB24)
+# define AV_RB24(p) AV_RN24(p)
+# elif !defined(AV_RN24) && defined(AV_RB24)
+# define AV_RN24(p) AV_RB24(p)
+# endif
+
+# if defined(AV_WN24) && !defined(AV_WB24)
+# define AV_WB24(p, v) AV_WN24(p, v)
+# elif !defined(AV_WN24) && defined(AV_WB24)
+# define AV_WN24(p, v) AV_WB24(p, v)
+# endif
+
+# if defined(AV_RN32) && !defined(AV_RB32)
+# define AV_RB32(p) AV_RN32(p)
+# elif !defined(AV_RN32) && defined(AV_RB32)
+# define AV_RN32(p) AV_RB32(p)
+# endif
+
+# if defined(AV_WN32) && !defined(AV_WB32)
+# define AV_WB32(p, v) AV_WN32(p, v)
+# elif !defined(AV_WN32) && defined(AV_WB32)
+# define AV_WN32(p, v) AV_WB32(p, v)
+# endif
+
+# if defined(AV_RN48) && !defined(AV_RB48)
+# define AV_RB48(p) AV_RN48(p)
+# elif !defined(AV_RN48) && defined(AV_RB48)
+# define AV_RN48(p) AV_RB48(p)
+# endif
+
+# if defined(AV_WN48) && !defined(AV_WB48)
+# define AV_WB48(p, v) AV_WN48(p, v)
+# elif !defined(AV_WN48) && defined(AV_WB48)
+# define AV_WN48(p, v) AV_WB48(p, v)
+# endif
+
+# if defined(AV_RN64) && !defined(AV_RB64)
+# define AV_RB64(p) AV_RN64(p)
+# elif !defined(AV_RN64) && defined(AV_RB64)
+# define AV_RN64(p) AV_RB64(p)
+# endif
+
+# if defined(AV_WN64) && !defined(AV_WB64)
+# define AV_WB64(p, v) AV_WN64(p, v)
+# elif !defined(AV_WN64) && defined(AV_WB64)
+# define AV_WN64(p, v) AV_WB64(p, v)
+# endif
+
+#else /* AV_HAVE_BIGENDIAN */
+
+# if defined(AV_RN16) && !defined(AV_RL16)
+# define AV_RL16(p) AV_RN16(p)
+# elif !defined(AV_RN16) && defined(AV_RL16)
+# define AV_RN16(p) AV_RL16(p)
+# endif
+
+# if defined(AV_WN16) && !defined(AV_WL16)
+# define AV_WL16(p, v) AV_WN16(p, v)
+# elif !defined(AV_WN16) && defined(AV_WL16)
+# define AV_WN16(p, v) AV_WL16(p, v)
+# endif
+
+# if defined(AV_RN24) && !defined(AV_RL24)
+# define AV_RL24(p) AV_RN24(p)
+# elif !defined(AV_RN24) && defined(AV_RL24)
+# define AV_RN24(p) AV_RL24(p)
+# endif
+
+# if defined(AV_WN24) && !defined(AV_WL24)
+# define AV_WL24(p, v) AV_WN24(p, v)
+# elif !defined(AV_WN24) && defined(AV_WL24)
+# define AV_WN24(p, v) AV_WL24(p, v)
+# endif
+
+# if defined(AV_RN32) && !defined(AV_RL32)
+# define AV_RL32(p) AV_RN32(p)
+# elif !defined(AV_RN32) && defined(AV_RL32)
+# define AV_RN32(p) AV_RL32(p)
+# endif
+
+# if defined(AV_WN32) && !defined(AV_WL32)
+# define AV_WL32(p, v) AV_WN32(p, v)
+# elif !defined(AV_WN32) && defined(AV_WL32)
+# define AV_WN32(p, v) AV_WL32(p, v)
+# endif
+
+# if defined(AV_RN48) && !defined(AV_RL48)
+# define AV_RL48(p) AV_RN48(p)
+# elif !defined(AV_RN48) && defined(AV_RL48)
+# define AV_RN48(p) AV_RL48(p)
+# endif
+
+# if defined(AV_WN48) && !defined(AV_WL48)
+# define AV_WL48(p, v) AV_WN48(p, v)
+# elif !defined(AV_WN48) && defined(AV_WL48)
+# define AV_WN48(p, v) AV_WL48(p, v)
+# endif
+
+# if defined(AV_RN64) && !defined(AV_RL64)
+# define AV_RL64(p) AV_RN64(p)
+# elif !defined(AV_RN64) && defined(AV_RL64)
+# define AV_RN64(p) AV_RL64(p)
+# endif
+
+# if defined(AV_WN64) && !defined(AV_WL64)
+# define AV_WL64(p, v) AV_WN64(p, v)
+# elif !defined(AV_WN64) && defined(AV_WL64)
+# define AV_WN64(p, v) AV_WL64(p, v)
+# endif
+
+#endif /* !AV_HAVE_BIGENDIAN */
+
+/*
+ * Define AV_[RW]N helper macros to simplify definitions not provided
+ * by per-arch headers.
+ */
+
+#if defined(__GNUC__) && !defined(__TI_COMPILER_VERSION__)
+
+union unaligned_64 { uint64_t l; } __attribute__((packed)) av_alias;
+union unaligned_32 { uint32_t l; } __attribute__((packed)) av_alias;
+union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias;
+
+# define AV_RN(s, p) (((const union unaligned_##s *) (p))->l)
+# define AV_WN(s, p, v) ((((union unaligned_##s *) (p))->l) = (v))
+
+#elif defined(__DECC)
+
+# define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p)))
+# define AV_WN(s, p, v) (*((__unaligned uint##s##_t*)(p)) = (v))
+
+#elif AV_HAVE_FAST_UNALIGNED
+
+# define AV_RN(s, p) (((const av_alias##s*)(p))->u##s)
+# define AV_WN(s, p, v) (((av_alias##s*)(p))->u##s = (v))
+
+#else
+
+#ifndef AV_RB16
+# define AV_RB16(x) \
+ ((((const uint8_t*)(x))[0] << 8) | \
+ ((const uint8_t*)(x))[1])
+#endif
+#ifndef AV_WB16
+# define AV_WB16(p, darg) do { \
+ unsigned d = (darg); \
+ ((uint8_t*)(p))[1] = (d); \
+ ((uint8_t*)(p))[0] = (d)>>8; \
+ } while(0)
+#endif
+
+#ifndef AV_RL16
+# define AV_RL16(x) \
+ ((((const uint8_t*)(x))[1] << 8) | \
+ ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL16
+# define AV_WL16(p, darg) do { \
+ unsigned d = (darg); \
+ ((uint8_t*)(p))[0] = (d); \
+ ((uint8_t*)(p))[1] = (d)>>8; \
+ } while(0)
+#endif
+
+#ifndef AV_RB32
+# define AV_RB32(x) \
+ (((uint32_t)((const uint8_t*)(x))[0] << 24) | \
+ (((const uint8_t*)(x))[1] << 16) | \
+ (((const uint8_t*)(x))[2] << 8) | \
+ ((const uint8_t*)(x))[3])
+#endif
+#ifndef AV_WB32
+# define AV_WB32(p, darg) do { \
+ unsigned d = (darg); \
+ ((uint8_t*)(p))[3] = (d); \
+ ((uint8_t*)(p))[2] = (d)>>8; \
+ ((uint8_t*)(p))[1] = (d)>>16; \
+ ((uint8_t*)(p))[0] = (d)>>24; \
+ } while(0)
+#endif
+
+#ifndef AV_RL32
+# define AV_RL32(x) \
+ (((uint32_t)((const uint8_t*)(x))[3] << 24) | \
+ (((const uint8_t*)(x))[2] << 16) | \
+ (((const uint8_t*)(x))[1] << 8) | \
+ ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL32
+# define AV_WL32(p, darg) do { \
+ unsigned d = (darg); \
+ ((uint8_t*)(p))[0] = (d); \
+ ((uint8_t*)(p))[1] = (d)>>8; \
+ ((uint8_t*)(p))[2] = (d)>>16; \
+ ((uint8_t*)(p))[3] = (d)>>24; \
+ } while(0)
+#endif
+
+#ifndef AV_RB64
+# define AV_RB64(x) \
+ (((uint64_t)((const uint8_t*)(x))[0] << 56) | \
+ ((uint64_t)((const uint8_t*)(x))[1] << 48) | \
+ ((uint64_t)((const uint8_t*)(x))[2] << 40) | \
+ ((uint64_t)((const uint8_t*)(x))[3] << 32) | \
+ ((uint64_t)((const uint8_t*)(x))[4] << 24) | \
+ ((uint64_t)((const uint8_t*)(x))[5] << 16) | \
+ ((uint64_t)((const uint8_t*)(x))[6] << 8) | \
+ (uint64_t)((const uint8_t*)(x))[7])
+#endif
+#ifndef AV_WB64
+# define AV_WB64(p, darg) do { \
+ uint64_t d = (darg); \
+ ((uint8_t*)(p))[7] = (d); \
+ ((uint8_t*)(p))[6] = (d)>>8; \
+ ((uint8_t*)(p))[5] = (d)>>16; \
+ ((uint8_t*)(p))[4] = (d)>>24; \
+ ((uint8_t*)(p))[3] = (d)>>32; \
+ ((uint8_t*)(p))[2] = (d)>>40; \
+ ((uint8_t*)(p))[1] = (d)>>48; \
+ ((uint8_t*)(p))[0] = (d)>>56; \
+ } while(0)
+#endif
+
+#ifndef AV_RL64
+# define AV_RL64(x) \
+ (((uint64_t)((const uint8_t*)(x))[7] << 56) | \
+ ((uint64_t)((const uint8_t*)(x))[6] << 48) | \
+ ((uint64_t)((const uint8_t*)(x))[5] << 40) | \
+ ((uint64_t)((const uint8_t*)(x))[4] << 32) | \
+ ((uint64_t)((const uint8_t*)(x))[3] << 24) | \
+ ((uint64_t)((const uint8_t*)(x))[2] << 16) | \
+ ((uint64_t)((const uint8_t*)(x))[1] << 8) | \
+ (uint64_t)((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL64
+# define AV_WL64(p, darg) do { \
+ uint64_t d = (darg); \
+ ((uint8_t*)(p))[0] = (d); \
+ ((uint8_t*)(p))[1] = (d)>>8; \
+ ((uint8_t*)(p))[2] = (d)>>16; \
+ ((uint8_t*)(p))[3] = (d)>>24; \
+ ((uint8_t*)(p))[4] = (d)>>32; \
+ ((uint8_t*)(p))[5] = (d)>>40; \
+ ((uint8_t*)(p))[6] = (d)>>48; \
+ ((uint8_t*)(p))[7] = (d)>>56; \
+ } while(0)
+#endif
+
+#if AV_HAVE_BIGENDIAN
+# define AV_RN(s, p) AV_RB##s(p)
+# define AV_WN(s, p, v) AV_WB##s(p, v)
+#else
+# define AV_RN(s, p) AV_RL##s(p)
+# define AV_WN(s, p, v) AV_WL##s(p, v)
+#endif
+
+#endif /* HAVE_FAST_UNALIGNED */
+
+#ifndef AV_RN16
+# define AV_RN16(p) AV_RN(16, p)
+#endif
+
+#ifndef AV_RN32
+# define AV_RN32(p) AV_RN(32, p)
+#endif
+
+#ifndef AV_RN64
+# define AV_RN64(p) AV_RN(64, p)
+#endif
+
+#ifndef AV_WN16
+# define AV_WN16(p, v) AV_WN(16, p, v)
+#endif
+
+#ifndef AV_WN32
+# define AV_WN32(p, v) AV_WN(32, p, v)
+#endif
+
+#ifndef AV_WN64
+# define AV_WN64(p, v) AV_WN(64, p, v)
+#endif
+
+#if AV_HAVE_BIGENDIAN
+# define AV_RB(s, p) AV_RN##s(p)
+# define AV_WB(s, p, v) AV_WN##s(p, v)
+# define AV_RL(s, p) av_bswap##s(AV_RN##s(p))
+# define AV_WL(s, p, v) AV_WN##s(p, av_bswap##s(v))
+#else
+# define AV_RB(s, p) av_bswap##s(AV_RN##s(p))
+# define AV_WB(s, p, v) AV_WN##s(p, av_bswap##s(v))
+# define AV_RL(s, p) AV_RN##s(p)
+# define AV_WL(s, p, v) AV_WN##s(p, v)
+#endif
+
+#define AV_RB8(x) (((const uint8_t*)(x))[0])
+#define AV_WB8(p, d) do { ((uint8_t*)(p))[0] = (d); } while(0)
+
+#define AV_RL8(x) AV_RB8(x)
+#define AV_WL8(p, d) AV_WB8(p, d)
+
+#ifndef AV_RB16
+# define AV_RB16(p) AV_RB(16, p)
+#endif
+#ifndef AV_WB16
+# define AV_WB16(p, v) AV_WB(16, p, v)
+#endif
+
+#ifndef AV_RL16
+# define AV_RL16(p) AV_RL(16, p)
+#endif
+#ifndef AV_WL16
+# define AV_WL16(p, v) AV_WL(16, p, v)
+#endif
+
+#ifndef AV_RB32
+# define AV_RB32(p) AV_RB(32, p)
+#endif
+#ifndef AV_WB32
+# define AV_WB32(p, v) AV_WB(32, p, v)
+#endif
+
+#ifndef AV_RL32
+# define AV_RL32(p) AV_RL(32, p)
+#endif
+#ifndef AV_WL32
+# define AV_WL32(p, v) AV_WL(32, p, v)
+#endif
+
+#ifndef AV_RB64
+# define AV_RB64(p) AV_RB(64, p)
+#endif
+#ifndef AV_WB64
+# define AV_WB64(p, v) AV_WB(64, p, v)
+#endif
+
+#ifndef AV_RL64
+# define AV_RL64(p) AV_RL(64, p)
+#endif
+#ifndef AV_WL64
+# define AV_WL64(p, v) AV_WL(64, p, v)
+#endif
+
+#ifndef AV_RB24
+# define AV_RB24(x) \
+ ((((const uint8_t*)(x))[0] << 16) | \
+ (((const uint8_t*)(x))[1] << 8) | \
+ ((const uint8_t*)(x))[2])
+#endif
+#ifndef AV_WB24
+# define AV_WB24(p, d) do { \
+ ((uint8_t*)(p))[2] = (d); \
+ ((uint8_t*)(p))[1] = (d)>>8; \
+ ((uint8_t*)(p))[0] = (d)>>16; \
+ } while(0)
+#endif
+
+#ifndef AV_RL24
+# define AV_RL24(x) \
+ ((((const uint8_t*)(x))[2] << 16) | \
+ (((const uint8_t*)(x))[1] << 8) | \
+ ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL24
+# define AV_WL24(p, d) do { \
+ ((uint8_t*)(p))[0] = (d); \
+ ((uint8_t*)(p))[1] = (d)>>8; \
+ ((uint8_t*)(p))[2] = (d)>>16; \
+ } while(0)
+#endif
+
+#ifndef AV_RB48
+# define AV_RB48(x) \
+ (((uint64_t)((const uint8_t*)(x))[0] << 40) | \
+ ((uint64_t)((const uint8_t*)(x))[1] << 32) | \
+ ((uint64_t)((const uint8_t*)(x))[2] << 24) | \
+ ((uint64_t)((const uint8_t*)(x))[3] << 16) | \
+ ((uint64_t)((const uint8_t*)(x))[4] << 8) | \
+ (uint64_t)((const uint8_t*)(x))[5])
+#endif
+#ifndef AV_WB48
+# define AV_WB48(p, darg) do { \
+ uint64_t d = (darg); \
+ ((uint8_t*)(p))[5] = (d); \
+ ((uint8_t*)(p))[4] = (d)>>8; \
+ ((uint8_t*)(p))[3] = (d)>>16; \
+ ((uint8_t*)(p))[2] = (d)>>24; \
+ ((uint8_t*)(p))[1] = (d)>>32; \
+ ((uint8_t*)(p))[0] = (d)>>40; \
+ } while(0)
+#endif
+
+#ifndef AV_RL48
+# define AV_RL48(x) \
+ (((uint64_t)((const uint8_t*)(x))[5] << 40) | \
+ ((uint64_t)((const uint8_t*)(x))[4] << 32) | \
+ ((uint64_t)((const uint8_t*)(x))[3] << 24) | \
+ ((uint64_t)((const uint8_t*)(x))[2] << 16) | \
+ ((uint64_t)((const uint8_t*)(x))[1] << 8) | \
+ (uint64_t)((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL48
+# define AV_WL48(p, darg) do { \
+ uint64_t d = (darg); \
+ ((uint8_t*)(p))[0] = (d); \
+ ((uint8_t*)(p))[1] = (d)>>8; \
+ ((uint8_t*)(p))[2] = (d)>>16; \
+ ((uint8_t*)(p))[3] = (d)>>24; \
+ ((uint8_t*)(p))[4] = (d)>>32; \
+ ((uint8_t*)(p))[5] = (d)>>40; \
+ } while(0)
+#endif
+
+/*
+ * The AV_[RW]NA macros access naturally aligned data
+ * in a type-safe way.
+ */
+
+#define AV_RNA(s, p) (((const av_alias##s*)(p))->u##s)
+#define AV_WNA(s, p, v) (((av_alias##s*)(p))->u##s = (v))
+
+#ifndef AV_RN16A
+# define AV_RN16A(p) AV_RNA(16, p)
+#endif
+
+#ifndef AV_RN32A
+# define AV_RN32A(p) AV_RNA(32, p)
+#endif
+
+#ifndef AV_RN64A
+# define AV_RN64A(p) AV_RNA(64, p)
+#endif
+
+#ifndef AV_WN16A
+# define AV_WN16A(p, v) AV_WNA(16, p, v)
+#endif
+
+#ifndef AV_WN32A
+# define AV_WN32A(p, v) AV_WNA(32, p, v)
+#endif
+
+#ifndef AV_WN64A
+# define AV_WN64A(p, v) AV_WNA(64, p, v)
+#endif
+
+/*
+ * The AV_COPYxxU macros are suitable for copying data to/from unaligned
+ * memory locations.
+ */
+
+#define AV_COPYU(n, d, s) AV_WN##n(d, AV_RN##n(s));
+
+#ifndef AV_COPY16U
+# define AV_COPY16U(d, s) AV_COPYU(16, d, s)
+#endif
+
+#ifndef AV_COPY32U
+# define AV_COPY32U(d, s) AV_COPYU(32, d, s)
+#endif
+
+#ifndef AV_COPY64U
+# define AV_COPY64U(d, s) AV_COPYU(64, d, s)
+#endif
+
+#ifndef AV_COPY128U
+# define AV_COPY128U(d, s) \
+ do { \
+ AV_COPY64U(d, s); \
+ AV_COPY64U((char *)(d) + 8, (const char *)(s) + 8); \
+ } while(0)
+#endif
+
+/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
+ * naturally aligned. They may be implemented using MMX,
+ * so emms_c() must be called before using any float code
+ * afterwards.
+ */
+
+#define AV_COPY(n, d, s) \
+ (((av_alias##n*)(d))->u##n = ((const av_alias##n*)(s))->u##n)
+
+#ifndef AV_COPY16
+# define AV_COPY16(d, s) AV_COPY(16, d, s)
+#endif
+
+#ifndef AV_COPY32
+# define AV_COPY32(d, s) AV_COPY(32, d, s)
+#endif
+
+#ifndef AV_COPY64
+# define AV_COPY64(d, s) AV_COPY(64, d, s)
+#endif
+
+#ifndef AV_COPY128
+# define AV_COPY128(d, s) \
+ do { \
+ AV_COPY64(d, s); \
+ AV_COPY64((char*)(d)+8, (char*)(s)+8); \
+ } while(0)
+#endif
+
+#define AV_SWAP(n, a, b) FFSWAP(av_alias##n, *(av_alias##n*)(a), *(av_alias##n*)(b))
+
+#ifndef AV_SWAP64
+# define AV_SWAP64(a, b) AV_SWAP(64, a, b)
+#endif
+
+#define AV_ZERO(n, d) (((av_alias##n*)(d))->u##n = 0)
+
+#ifndef AV_ZERO16
+# define AV_ZERO16(d) AV_ZERO(16, d)
+#endif
+
+#ifndef AV_ZERO32
+# define AV_ZERO32(d) AV_ZERO(32, d)
+#endif
+
+#ifndef AV_ZERO64
+# define AV_ZERO64(d) AV_ZERO(64, d)
+#endif
+
+#ifndef AV_ZERO128
+# define AV_ZERO128(d) \
+ do { \
+ AV_ZERO64(d); \
+ AV_ZERO64((char*)(d)+8); \
+ } while(0)
+#endif
+
+#endif /* AVUTIL_INTREADWRITE_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/lfg.h b/TMessagesProj/jni/ffmpeg/include/libavutil/lfg.h
new file mode 100644
index 000000000..ec90562cf
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/lfg.h
@@ -0,0 +1,62 @@
+/*
+ * Lagged Fibonacci PRNG
+ * Copyright (c) 2008 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_LFG_H
+#define AVUTIL_LFG_H
+
+typedef struct AVLFG {
+ unsigned int state[64];
+ int index;
+} AVLFG;
+
+void av_lfg_init(AVLFG *c, unsigned int seed);
+
+/**
+ * Get the next random unsigned 32-bit number using an ALFG.
+ *
+ * Please also consider a simple LCG like state= state*1664525+1013904223,
+ * it may be good enough and faster for your specific use case.
+ */
+static inline unsigned int av_lfg_get(AVLFG *c){
+ c->state[c->index & 63] = c->state[(c->index-24) & 63] + c->state[(c->index-55) & 63];
+ return c->state[c->index++ & 63];
+}
+
+/**
+ * Get the next random unsigned 32-bit number using a MLFG.
+ *
+ * Please also consider av_lfg_get() above, it is faster.
+ */
+static inline unsigned int av_mlfg_get(AVLFG *c){
+ unsigned int a= c->state[(c->index-55) & 63];
+ unsigned int b= c->state[(c->index-24) & 63];
+ return c->state[c->index++ & 63] = 2*a*b+a+b;
+}
+
+/**
+ * Get the next two numbers generated by a Box-Muller Gaussian
+ * generator using the random numbers issued by lfg.
+ *
+ * @param out array where the two generated numbers are placed
+ */
+void av_bmg_get(AVLFG *lfg, double out[2]);
+
+#endif /* AVUTIL_LFG_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/log.h b/TMessagesProj/jni/ffmpeg/include/libavutil/log.h
new file mode 100644
index 000000000..321748cd8
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/log.h
@@ -0,0 +1,359 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_LOG_H
+#define AVUTIL_LOG_H
+
+#include
+#include "avutil.h"
+#include "attributes.h"
+#include "version.h"
+
+typedef enum {
+ AV_CLASS_CATEGORY_NA = 0,
+ AV_CLASS_CATEGORY_INPUT,
+ AV_CLASS_CATEGORY_OUTPUT,
+ AV_CLASS_CATEGORY_MUXER,
+ AV_CLASS_CATEGORY_DEMUXER,
+ AV_CLASS_CATEGORY_ENCODER,
+ AV_CLASS_CATEGORY_DECODER,
+ AV_CLASS_CATEGORY_FILTER,
+ AV_CLASS_CATEGORY_BITSTREAM_FILTER,
+ AV_CLASS_CATEGORY_SWSCALER,
+ AV_CLASS_CATEGORY_SWRESAMPLER,
+ AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT = 40,
+ AV_CLASS_CATEGORY_DEVICE_VIDEO_INPUT,
+ AV_CLASS_CATEGORY_DEVICE_AUDIO_OUTPUT,
+ AV_CLASS_CATEGORY_DEVICE_AUDIO_INPUT,
+ AV_CLASS_CATEGORY_DEVICE_OUTPUT,
+ AV_CLASS_CATEGORY_DEVICE_INPUT,
+ AV_CLASS_CATEGORY_NB, ///< not part of ABI/API
+}AVClassCategory;
+
+#define AV_IS_INPUT_DEVICE(category) \
+ (((category) == AV_CLASS_CATEGORY_DEVICE_VIDEO_INPUT) || \
+ ((category) == AV_CLASS_CATEGORY_DEVICE_AUDIO_INPUT) || \
+ ((category) == AV_CLASS_CATEGORY_DEVICE_INPUT))
+
+#define AV_IS_OUTPUT_DEVICE(category) \
+ (((category) == AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT) || \
+ ((category) == AV_CLASS_CATEGORY_DEVICE_AUDIO_OUTPUT) || \
+ ((category) == AV_CLASS_CATEGORY_DEVICE_OUTPUT))
+
+struct AVOptionRanges;
+
+/**
+ * Describe the class of an AVClass context structure. That is an
+ * arbitrary struct of which the first field is a pointer to an
+ * AVClass struct (e.g. AVCodecContext, AVFormatContext etc.).
+ */
+typedef struct AVClass {
+ /**
+ * The name of the class; usually it is the same name as the
+ * context structure type to which the AVClass is associated.
+ */
+ const char* class_name;
+
+ /**
+ * A pointer to a function which returns the name of a context
+ * instance ctx associated with the class.
+ */
+ const char* (*item_name)(void* ctx);
+
+ /**
+ * a pointer to the first option specified in the class if any or NULL
+ *
+ * @see av_set_default_options()
+ */
+ const struct AVOption *option;
+
+ /**
+ * LIBAVUTIL_VERSION with which this structure was created.
+ * This is used to allow fields to be added without requiring major
+ * version bumps everywhere.
+ */
+
+ int version;
+
+ /**
+ * Offset in the structure where log_level_offset is stored.
+ * 0 means there is no such variable
+ */
+ int log_level_offset_offset;
+
+ /**
+ * Offset in the structure where a pointer to the parent context for
+ * logging is stored. For example a decoder could pass its AVCodecContext
+ * to eval as such a parent context, which an av_log() implementation
+ * could then leverage to display the parent context.
+ * The offset can be NULL.
+ */
+ int parent_log_context_offset;
+
+ /**
+ * Return next AVOptions-enabled child or NULL
+ */
+ void* (*child_next)(void *obj, void *prev);
+
+ /**
+ * Return an AVClass corresponding to the next potential
+ * AVOptions-enabled child.
+ *
+ * The difference between child_next and this is that
+ * child_next iterates over _already existing_ objects, while
+ * child_class_next iterates over _all possible_ children.
+ */
+ const struct AVClass* (*child_class_next)(const struct AVClass *prev);
+
+ /**
+ * Category used for visualization (like color)
+ * This is only set if the category is equal for all objects using this class.
+ * available since version (51 << 16 | 56 << 8 | 100)
+ */
+ AVClassCategory category;
+
+ /**
+ * Callback to return the category.
+ * available since version (51 << 16 | 59 << 8 | 100)
+ */
+ AVClassCategory (*get_category)(void* ctx);
+
+ /**
+ * Callback to return the supported/allowed ranges.
+ * available since version (52.12)
+ */
+ int (*query_ranges)(struct AVOptionRanges **, void *obj, const char *key, int flags);
+} AVClass;
+
+/**
+ * @addtogroup lavu_log
+ *
+ * @{
+ *
+ * @defgroup lavu_log_constants Logging Constants
+ *
+ * @{
+ */
+
+/**
+ * Print no output.
+ */
+#define AV_LOG_QUIET -8
+
+/**
+ * Something went really wrong and we will crash now.
+ */
+#define AV_LOG_PANIC 0
+
+/**
+ * Something went wrong and recovery is not possible.
+ * For example, no header was found for a format which depends
+ * on headers or an illegal combination of parameters is used.
+ */
+#define AV_LOG_FATAL 8
+
+/**
+ * Something went wrong and cannot losslessly be recovered.
+ * However, not all future data is affected.
+ */
+#define AV_LOG_ERROR 16
+
+/**
+ * Something somehow does not look correct. This may or may not
+ * lead to problems. An example would be the use of '-vstrict -2'.
+ */
+#define AV_LOG_WARNING 24
+
+/**
+ * Standard information.
+ */
+#define AV_LOG_INFO 32
+
+/**
+ * Detailed information.
+ */
+#define AV_LOG_VERBOSE 40
+
+/**
+ * Stuff which is only useful for libav* developers.
+ */
+#define AV_LOG_DEBUG 48
+
+/**
+ * Extremely verbose debugging, useful for libav* development.
+ */
+#define AV_LOG_TRACE 56
+
+#define AV_LOG_MAX_OFFSET (AV_LOG_TRACE - AV_LOG_QUIET)
+
+/**
+ * @}
+ */
+
+/**
+ * Sets additional colors for extended debugging sessions.
+ * @code
+ av_log(ctx, AV_LOG_DEBUG|AV_LOG_C(134), "Message in purple\n");
+ @endcode
+ * Requires 256color terminal support. Uses outside debugging is not
+ * recommended.
+ */
+#define AV_LOG_C(x) ((x) << 8)
+
+/**
+ * Send the specified message to the log if the level is less than or equal
+ * to the current av_log_level. By default, all logging messages are sent to
+ * stderr. This behavior can be altered by setting a different logging callback
+ * function.
+ * @see av_log_set_callback
+ *
+ * @param avcl A pointer to an arbitrary struct of which the first field is a
+ * pointer to an AVClass struct or NULL if general log.
+ * @param level The importance level of the message expressed using a @ref
+ * lavu_log_constants "Logging Constant".
+ * @param fmt The format string (printf-compatible) that specifies how
+ * subsequent arguments are converted to output.
+ */
+void av_log(void *avcl, int level, const char *fmt, ...) av_printf_format(3, 4);
+
+
+/**
+ * Send the specified message to the log if the level is less than or equal
+ * to the current av_log_level. By default, all logging messages are sent to
+ * stderr. This behavior can be altered by setting a different logging callback
+ * function.
+ * @see av_log_set_callback
+ *
+ * @param avcl A pointer to an arbitrary struct of which the first field is a
+ * pointer to an AVClass struct.
+ * @param level The importance level of the message expressed using a @ref
+ * lavu_log_constants "Logging Constant".
+ * @param fmt The format string (printf-compatible) that specifies how
+ * subsequent arguments are converted to output.
+ * @param vl The arguments referenced by the format string.
+ */
+void av_vlog(void *avcl, int level, const char *fmt, va_list vl);
+
+/**
+ * Get the current log level
+ *
+ * @see lavu_log_constants
+ *
+ * @return Current log level
+ */
+int av_log_get_level(void);
+
+/**
+ * Set the log level
+ *
+ * @see lavu_log_constants
+ *
+ * @param level Logging level
+ */
+void av_log_set_level(int level);
+
+/**
+ * Set the logging callback
+ *
+ * @note The callback must be thread safe, even if the application does not use
+ * threads itself as some codecs are multithreaded.
+ *
+ * @see av_log_default_callback
+ *
+ * @param callback A logging function with a compatible signature.
+ */
+void av_log_set_callback(void (*callback)(void*, int, const char*, va_list));
+
+/**
+ * Default logging callback
+ *
+ * It prints the message to stderr, optionally colorizing it.
+ *
+ * @param avcl A pointer to an arbitrary struct of which the first field is a
+ * pointer to an AVClass struct.
+ * @param level The importance level of the message expressed using a @ref
+ * lavu_log_constants "Logging Constant".
+ * @param fmt The format string (printf-compatible) that specifies how
+ * subsequent arguments are converted to output.
+ * @param vl The arguments referenced by the format string.
+ */
+void av_log_default_callback(void *avcl, int level, const char *fmt,
+ va_list vl);
+
+/**
+ * Return the context name
+ *
+ * @param ctx The AVClass context
+ *
+ * @return The AVClass class_name
+ */
+const char* av_default_item_name(void* ctx);
+AVClassCategory av_default_get_category(void *ptr);
+
+/**
+ * Format a line of log the same way as the default callback.
+ * @param line buffer to receive the formatted line
+ * @param line_size size of the buffer
+ * @param print_prefix used to store whether the prefix must be printed;
+ * must point to a persistent integer initially set to 1
+ */
+void av_log_format_line(void *ptr, int level, const char *fmt, va_list vl,
+ char *line, int line_size, int *print_prefix);
+
+#if FF_API_DLOG
+/**
+ * av_dlog macros
+ * @deprecated unused
+ * Useful to print debug messages that shouldn't get compiled in normally.
+ */
+
+#ifdef DEBUG
+# define av_dlog(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
+#else
+# define av_dlog(pctx, ...) do { if (0) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__); } while (0)
+#endif
+#endif /* FF_API_DLOG */
+
+/**
+ * Skip repeated messages, this requires the user app to use av_log() instead of
+ * (f)printf as the 2 would otherwise interfere and lead to
+ * "Last message repeated x times" messages below (f)printf messages with some
+ * bad luck.
+ * Also to receive the last, "last repeated" line if any, the user app must
+ * call av_log(NULL, AV_LOG_QUIET, "%s", ""); at the end
+ */
+#define AV_LOG_SKIP_REPEATED 1
+
+/**
+ * Include the log severity in messages originating from codecs.
+ *
+ * Results in messages such as:
+ * [rawvideo @ 0xDEADBEEF] [error] encode did not produce valid pts
+ */
+#define AV_LOG_PRINT_LEVEL 2
+
+void av_log_set_flags(int arg);
+int av_log_get_flags(void);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_LOG_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/macros.h b/TMessagesProj/jni/ffmpeg/include/libavutil/macros.h
new file mode 100644
index 000000000..446532377
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/macros.h
@@ -0,0 +1,48 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @ingroup lavu
+ * Utility Preprocessor macros
+ */
+
+#ifndef AVUTIL_MACROS_H
+#define AVUTIL_MACROS_H
+
+/**
+ * @addtogroup preproc_misc Preprocessor String Macros
+ *
+ * String manipulation macros
+ *
+ * @{
+ */
+
+#define AV_STRINGIFY(s) AV_TOSTRING(s)
+#define AV_TOSTRING(s) #s
+
+#define AV_GLUE(a, b) a ## b
+#define AV_JOIN(a, b) AV_GLUE(a, b)
+
+/**
+ * @}
+ */
+
+#define AV_PRAGMA(s) _Pragma(#s)
+
+#endif /* AVUTIL_MACROS_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/mathematics.h b/TMessagesProj/jni/ffmpeg/include/libavutil/mathematics.h
new file mode 100644
index 000000000..57c44f845
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/mathematics.h
@@ -0,0 +1,165 @@
+/*
+ * copyright (c) 2005-2012 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MATHEMATICS_H
+#define AVUTIL_MATHEMATICS_H
+
+#include
+#include
+#include "attributes.h"
+#include "rational.h"
+#include "intfloat.h"
+
+#ifndef M_E
+#define M_E 2.7182818284590452354 /* e */
+#endif
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942 /* log_e 2 */
+#endif
+#ifndef M_LN10
+#define M_LN10 2.30258509299404568402 /* log_e 10 */
+#endif
+#ifndef M_LOG2_10
+#define M_LOG2_10 3.32192809488736234787 /* log_2 10 */
+#endif
+#ifndef M_PHI
+#define M_PHI 1.61803398874989484820 /* phi / golden ratio */
+#endif
+#ifndef M_PI
+#define M_PI 3.14159265358979323846 /* pi */
+#endif
+#ifndef M_PI_2
+#define M_PI_2 1.57079632679489661923 /* pi/2 */
+#endif
+#ifndef M_SQRT1_2
+#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
+#endif
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
+#endif
+#ifndef NAN
+#define NAN av_int2float(0x7fc00000)
+#endif
+#ifndef INFINITY
+#define INFINITY av_int2float(0x7f800000)
+#endif
+
+/**
+ * @addtogroup lavu_math
+ * @{
+ */
+
+
+enum AVRounding {
+ AV_ROUND_ZERO = 0, ///< Round toward zero.
+ AV_ROUND_INF = 1, ///< Round away from zero.
+ AV_ROUND_DOWN = 2, ///< Round toward -infinity.
+ AV_ROUND_UP = 3, ///< Round toward +infinity.
+ AV_ROUND_NEAR_INF = 5, ///< Round to nearest and halfway cases away from zero.
+ AV_ROUND_PASS_MINMAX = 8192, ///< Flag to pass INT64_MIN/MAX through instead of rescaling, this avoids special cases for AV_NOPTS_VALUE
+};
+
+/**
+ * Compute the greatest common divisor of a and b.
+ *
+ * @return gcd of a and b up to sign; if a >= 0 and b >= 0, return value is >= 0;
+ * if a == 0 and b == 0, returns 0.
+ */
+int64_t av_const av_gcd(int64_t a, int64_t b);
+
+/**
+ * Rescale a 64-bit integer with rounding to nearest.
+ * A simple a*b/c isn't possible as it can overflow.
+ */
+int64_t av_rescale(int64_t a, int64_t b, int64_t c) av_const;
+
+/**
+ * Rescale a 64-bit integer with specified rounding.
+ * A simple a*b/c isn't possible as it can overflow.
+ *
+ * @return rescaled value a, or if AV_ROUND_PASS_MINMAX is set and a is
+ * INT64_MIN or INT64_MAX then a is passed through unchanged.
+ */
+int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding) av_const;
+
+/**
+ * Rescale a 64-bit integer by 2 rational numbers.
+ */
+int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq) av_const;
+
+/**
+ * Rescale a 64-bit integer by 2 rational numbers with specified rounding.
+ *
+ * @return rescaled value a, or if AV_ROUND_PASS_MINMAX is set and a is
+ * INT64_MIN or INT64_MAX then a is passed through unchanged.
+ */
+int64_t av_rescale_q_rnd(int64_t a, AVRational bq, AVRational cq,
+ enum AVRounding) av_const;
+
+/**
+ * Compare 2 timestamps each in its own timebases.
+ * The result of the function is undefined if one of the timestamps
+ * is outside the int64_t range when represented in the others timebase.
+ * @return -1 if ts_a is before ts_b, 1 if ts_a is after ts_b or 0 if they represent the same position
+ */
+int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b);
+
+/**
+ * Compare 2 integers modulo mod.
+ * That is we compare integers a and b for which only the least
+ * significant log2(mod) bits are known.
+ *
+ * @param mod must be a power of 2
+ * @return a negative value if a is smaller than b
+ * a positive value if a is greater than b
+ * 0 if a equals b
+ */
+int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod);
+
+/**
+ * Rescale a timestamp while preserving known durations.
+ *
+ * @param in_ts Input timestamp
+ * @param in_tb Input timebase
+ * @param fs_tb Duration and *last timebase
+ * @param duration duration till the next call
+ * @param out_tb Output timebase
+ */
+int64_t av_rescale_delta(AVRational in_tb, int64_t in_ts, AVRational fs_tb, int duration, int64_t *last, AVRational out_tb);
+
+/**
+ * Add a value to a timestamp.
+ *
+ * This function guarantees that when the same value is repeatly added that
+ * no accumulation of rounding errors occurs.
+ *
+ * @param ts Input timestamp
+ * @param ts_tb Input timestamp timebase
+ * @param inc value to add to ts
+ * @param inc_tb inc timebase
+ */
+int64_t av_add_stable(AVRational ts_tb, int64_t ts, AVRational inc_tb, int64_t inc);
+
+
+ /**
+ * @}
+ */
+
+#endif /* AVUTIL_MATHEMATICS_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/md5.h b/TMessagesProj/jni/ffmpeg/include/libavutil/md5.h
new file mode 100644
index 000000000..79702c88c
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/md5.h
@@ -0,0 +1,81 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MD5_H
+#define AVUTIL_MD5_H
+
+#include
+
+#include "attributes.h"
+#include "version.h"
+
+/**
+ * @defgroup lavu_md5 MD5
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+extern const int av_md5_size;
+
+struct AVMD5;
+
+/**
+ * Allocate an AVMD5 context.
+ */
+struct AVMD5 *av_md5_alloc(void);
+
+/**
+ * Initialize MD5 hashing.
+ *
+ * @param ctx pointer to the function context (of size av_md5_size)
+ */
+void av_md5_init(struct AVMD5 *ctx);
+
+/**
+ * Update hash value.
+ *
+ * @param ctx hash function context
+ * @param src input data to update hash with
+ * @param len input data length
+ */
+void av_md5_update(struct AVMD5 *ctx, const uint8_t *src, int len);
+
+/**
+ * Finish hashing and output digest value.
+ *
+ * @param ctx hash function context
+ * @param dst buffer where output digest value is stored
+ */
+void av_md5_final(struct AVMD5 *ctx, uint8_t *dst);
+
+/**
+ * Hash an array of data.
+ *
+ * @param dst The output buffer to write the digest into
+ * @param src The data to hash
+ * @param len The length of the data, in bytes
+ */
+void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_MD5_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/mem.h b/TMessagesProj/jni/ffmpeg/include/libavutil/mem.h
new file mode 100644
index 000000000..d25b3229b
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/mem.h
@@ -0,0 +1,406 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * memory handling functions
+ */
+
+#ifndef AVUTIL_MEM_H
+#define AVUTIL_MEM_H
+
+#include
+#include
+
+#include "attributes.h"
+#include "error.h"
+#include "avutil.h"
+
+/**
+ * @addtogroup lavu_mem
+ * @{
+ */
+
+
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C)
+ #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v
+ #define DECLARE_ASM_CONST(n,t,v) const t __attribute__ ((aligned (n))) v
+#elif defined(__TI_COMPILER_VERSION__)
+ #define DECLARE_ALIGNED(n,t,v) \
+ AV_PRAGMA(DATA_ALIGN(v,n)) \
+ t __attribute__((aligned(n))) v
+ #define DECLARE_ASM_CONST(n,t,v) \
+ AV_PRAGMA(DATA_ALIGN(v,n)) \
+ static const t __attribute__((aligned(n))) v
+#elif defined(__GNUC__)
+ #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v
+ #define DECLARE_ASM_CONST(n,t,v) static const t av_used __attribute__ ((aligned (n))) v
+#elif defined(_MSC_VER)
+ #define DECLARE_ALIGNED(n,t,v) __declspec(align(n)) t v
+ #define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static const t v
+#else
+ #define DECLARE_ALIGNED(n,t,v) t v
+ #define DECLARE_ASM_CONST(n,t,v) static const t v
+#endif
+
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+ #define av_malloc_attrib __attribute__((__malloc__))
+#else
+ #define av_malloc_attrib
+#endif
+
+#if AV_GCC_VERSION_AT_LEAST(4,3)
+ #define av_alloc_size(...) __attribute__((alloc_size(__VA_ARGS__)))
+#else
+ #define av_alloc_size(...)
+#endif
+
+/**
+ * Allocate a block of size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU).
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_mallocz()
+ */
+void *av_malloc(size_t size) av_malloc_attrib av_alloc_size(1);
+
+/**
+ * Allocate a block of size * nmemb bytes with av_malloc().
+ * @param nmemb Number of elements
+ * @param size Size of the single element
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_malloc()
+ */
+av_alloc_size(1, 2) static inline void *av_malloc_array(size_t nmemb, size_t size)
+{
+ if (!size || nmemb >= INT_MAX / size)
+ return NULL;
+ return av_malloc(nmemb * size);
+}
+
+/**
+ * Allocate or reallocate a block of memory.
+ * If ptr is NULL and size > 0, allocate a new block. If
+ * size is zero, free the memory block pointed to by ptr.
+ * @param ptr Pointer to a memory block already allocated with
+ * av_realloc() or NULL.
+ * @param size Size in bytes of the memory block to be allocated or
+ * reallocated.
+ * @return Pointer to a newly-reallocated block or NULL if the block
+ * cannot be reallocated or the function is used to free the memory block.
+ * @warning Pointers originating from the av_malloc() family of functions must
+ * not be passed to av_realloc(). The former can be implemented using
+ * memalign() (or other functions), and there is no guarantee that
+ * pointers from such functions can be passed to realloc() at all.
+ * The situation is undefined according to POSIX and may crash with
+ * some libc implementations.
+ * @see av_fast_realloc()
+ */
+void *av_realloc(void *ptr, size_t size) av_alloc_size(2);
+
+/**
+ * Allocate or reallocate a block of memory.
+ * This function does the same thing as av_realloc, except:
+ * - It takes two arguments and checks the result of the multiplication for
+ * integer overflow.
+ * - It frees the input block in case of failure, thus avoiding the memory
+ * leak with the classic "buf = realloc(buf); if (!buf) return -1;".
+ */
+void *av_realloc_f(void *ptr, size_t nelem, size_t elsize);
+
+/**
+ * Allocate or reallocate a block of memory.
+ * If *ptr is NULL and size > 0, allocate a new block. If
+ * size is zero, free the memory block pointed to by ptr.
+ * @param ptr Pointer to a pointer to a memory block already allocated
+ * with av_realloc(), or pointer to a pointer to NULL.
+ * The pointer is updated on success, or freed on failure.
+ * @param size Size in bytes for the memory block to be allocated or
+ * reallocated
+ * @return Zero on success, an AVERROR error code on failure.
+ * @warning Pointers originating from the av_malloc() family of functions must
+ * not be passed to av_reallocp(). The former can be implemented using
+ * memalign() (or other functions), and there is no guarantee that
+ * pointers from such functions can be passed to realloc() at all.
+ * The situation is undefined according to POSIX and may crash with
+ * some libc implementations.
+ */
+av_warn_unused_result
+int av_reallocp(void *ptr, size_t size);
+
+/**
+ * Allocate or reallocate an array.
+ * If ptr is NULL and nmemb > 0, allocate a new block. If
+ * nmemb is zero, free the memory block pointed to by ptr.
+ * @param ptr Pointer to a memory block already allocated with
+ * av_realloc() or NULL.
+ * @param nmemb Number of elements
+ * @param size Size of the single element
+ * @return Pointer to a newly-reallocated block or NULL if the block
+ * cannot be reallocated or the function is used to free the memory block.
+ * @warning Pointers originating from the av_malloc() family of functions must
+ * not be passed to av_realloc(). The former can be implemented using
+ * memalign() (or other functions), and there is no guarantee that
+ * pointers from such functions can be passed to realloc() at all.
+ * The situation is undefined according to POSIX and may crash with
+ * some libc implementations.
+ */
+av_alloc_size(2, 3) void *av_realloc_array(void *ptr, size_t nmemb, size_t size);
+
+/**
+ * Allocate or reallocate an array through a pointer to a pointer.
+ * If *ptr is NULL and nmemb > 0, allocate a new block. If
+ * nmemb is zero, free the memory block pointed to by ptr.
+ * @param ptr Pointer to a pointer to a memory block already allocated
+ * with av_realloc(), or pointer to a pointer to NULL.
+ * The pointer is updated on success, or freed on failure.
+ * @param nmemb Number of elements
+ * @param size Size of the single element
+ * @return Zero on success, an AVERROR error code on failure.
+ * @warning Pointers originating from the av_malloc() family of functions must
+ * not be passed to av_realloc(). The former can be implemented using
+ * memalign() (or other functions), and there is no guarantee that
+ * pointers from such functions can be passed to realloc() at all.
+ * The situation is undefined according to POSIX and may crash with
+ * some libc implementations.
+ */
+av_alloc_size(2, 3) int av_reallocp_array(void *ptr, size_t nmemb, size_t size);
+
+/**
+ * Free a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc().
+ * @param ptr Pointer to the memory block which should be freed.
+ * @note ptr = NULL is explicitly allowed.
+ * @note It is recommended that you use av_freep() instead.
+ * @see av_freep()
+ */
+void av_free(void *ptr);
+
+/**
+ * Allocate a block of size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU) and
+ * zero all the bytes of the block.
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if it cannot be allocated.
+ * @see av_malloc()
+ */
+void *av_mallocz(size_t size) av_malloc_attrib av_alloc_size(1);
+
+/**
+ * Allocate a block of nmemb * size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU) and
+ * zero all the bytes of the block.
+ * The allocation will fail if nmemb * size is greater than or equal
+ * to INT_MAX.
+ * @param nmemb
+ * @param size
+ * @return Pointer to the allocated block, NULL if it cannot be allocated.
+ */
+void *av_calloc(size_t nmemb, size_t size) av_malloc_attrib;
+
+/**
+ * Allocate a block of size * nmemb bytes with av_mallocz().
+ * @param nmemb Number of elements
+ * @param size Size of the single element
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_mallocz()
+ * @see av_malloc_array()
+ */
+av_alloc_size(1, 2) static inline void *av_mallocz_array(size_t nmemb, size_t size)
+{
+ if (!size || nmemb >= INT_MAX / size)
+ return NULL;
+ return av_mallocz(nmemb * size);
+}
+
+/**
+ * Duplicate the string s.
+ * @param s string to be duplicated
+ * @return Pointer to a newly-allocated string containing a
+ * copy of s or NULL if the string cannot be allocated.
+ */
+char *av_strdup(const char *s) av_malloc_attrib;
+
+/**
+ * Duplicate a substring of the string s.
+ * @param s string to be duplicated
+ * @param len the maximum length of the resulting string (not counting the
+ * terminating byte).
+ * @return Pointer to a newly-allocated string containing a
+ * copy of s or NULL if the string cannot be allocated.
+ */
+char *av_strndup(const char *s, size_t len) av_malloc_attrib;
+
+/**
+ * Duplicate the buffer p.
+ * @param p buffer to be duplicated
+ * @return Pointer to a newly allocated buffer containing a
+ * copy of p or NULL if the buffer cannot be allocated.
+ */
+void *av_memdup(const void *p, size_t size);
+
+/**
+ * Free a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc() and set the pointer pointing to it to NULL.
+ * @param ptr Pointer to the pointer to the memory block which should
+ * be freed.
+ * @note passing a pointer to a NULL pointer is safe and leads to no action.
+ * @see av_free()
+ */
+void av_freep(void *ptr);
+
+/**
+ * Add an element to a dynamic array.
+ *
+ * The array to grow is supposed to be an array of pointers to
+ * structures, and the element to add must be a pointer to an already
+ * allocated structure.
+ *
+ * The array is reallocated when its size reaches powers of 2.
+ * Therefore, the amortized cost of adding an element is constant.
+ *
+ * In case of success, the pointer to the array is updated in order to
+ * point to the new grown array, and the number pointed to by nb_ptr
+ * is incremented.
+ * In case of failure, the array is freed, *tab_ptr is set to NULL and
+ * *nb_ptr is set to 0.
+ *
+ * @param tab_ptr pointer to the array to grow
+ * @param nb_ptr pointer to the number of elements in the array
+ * @param elem element to add
+ * @see av_dynarray_add_nofree(), av_dynarray2_add()
+ */
+void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem);
+
+/**
+ * Add an element to a dynamic array.
+ *
+ * Function has the same functionality as av_dynarray_add(),
+ * but it doesn't free memory on fails. It returns error code
+ * instead and leave current buffer untouched.
+ *
+ * @param tab_ptr pointer to the array to grow
+ * @param nb_ptr pointer to the number of elements in the array
+ * @param elem element to add
+ * @return >=0 on success, negative otherwise.
+ * @see av_dynarray_add(), av_dynarray2_add()
+ */
+av_warn_unused_result
+int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem);
+
+/**
+ * Add an element of size elem_size to a dynamic array.
+ *
+ * The array is reallocated when its number of elements reaches powers of 2.
+ * Therefore, the amortized cost of adding an element is constant.
+ *
+ * In case of success, the pointer to the array is updated in order to
+ * point to the new grown array, and the number pointed to by nb_ptr
+ * is incremented.
+ * In case of failure, the array is freed, *tab_ptr is set to NULL and
+ * *nb_ptr is set to 0.
+ *
+ * @param tab_ptr pointer to the array to grow
+ * @param nb_ptr pointer to the number of elements in the array
+ * @param elem_size size in bytes of the elements in the array
+ * @param elem_data pointer to the data of the element to add. If NULL, the space of
+ * the new added element is not filled.
+ * @return pointer to the data of the element to copy in the new allocated space.
+ * If NULL, the new allocated space is left uninitialized."
+ * @see av_dynarray_add(), av_dynarray_add_nofree()
+ */
+void *av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size,
+ const uint8_t *elem_data);
+
+/**
+ * Multiply two size_t values checking for overflow.
+ * @return 0 if success, AVERROR(EINVAL) if overflow.
+ */
+static inline int av_size_mult(size_t a, size_t b, size_t *r)
+{
+ size_t t = a * b;
+ /* Hack inspired from glibc: only try the division if nelem and elsize
+ * are both greater than sqrt(SIZE_MAX). */
+ if ((a | b) >= ((size_t)1 << (sizeof(size_t) * 4)) && a && t / a != b)
+ return AVERROR(EINVAL);
+ *r = t;
+ return 0;
+}
+
+/**
+ * Set the maximum size that may me allocated in one block.
+ */
+void av_max_alloc(size_t max);
+
+/**
+ * deliberately overlapping memcpy implementation
+ * @param dst destination buffer
+ * @param back how many bytes back we start (the initial size of the overlapping window), must be > 0
+ * @param cnt number of bytes to copy, must be >= 0
+ *
+ * cnt > back is valid, this will copy the bytes we just copied,
+ * thus creating a repeating pattern with a period length of back.
+ */
+void av_memcpy_backptr(uint8_t *dst, int back, int cnt);
+
+/**
+ * Reallocate the given block if it is not large enough, otherwise do nothing.
+ *
+ * @see av_realloc
+ */
+void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size);
+
+/**
+ * Allocate a buffer, reusing the given one if large enough.
+ *
+ * Contrary to av_fast_realloc the current buffer contents might not be
+ * preserved and on error the old buffer is freed, thus no special
+ * handling to avoid memleaks is necessary.
+ *
+ * @param ptr pointer to pointer to already allocated buffer, overwritten with pointer to new buffer
+ * @param size size of the buffer *ptr points to
+ * @param min_size minimum size of *ptr buffer after returning, *ptr will be NULL and
+ * *size 0 if an error occurred.
+ */
+void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size);
+
+/**
+ * Allocate a buffer, reusing the given one if large enough.
+ *
+ * All newly allocated space is initially cleared
+ * Contrary to av_fast_realloc the current buffer contents might not be
+ * preserved and on error the old buffer is freed, thus no special
+ * handling to avoid memleaks is necessary.
+ *
+ * @param ptr pointer to pointer to already allocated buffer, overwritten with pointer to new buffer
+ * @param size size of the buffer *ptr points to
+ * @param min_size minimum size of *ptr buffer after returning, *ptr will be NULL and
+ * *size 0 if an error occurred.
+ */
+void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_MEM_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/motion_vector.h b/TMessagesProj/jni/ffmpeg/include/libavutil/motion_vector.h
new file mode 100644
index 000000000..ec2955638
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/motion_vector.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MOTION_VECTOR_H
+#define AVUTIL_MOTION_VECTOR_H
+
+#include
+
+typedef struct AVMotionVector {
+ /**
+ * Where the current macroblock comes from; negative value when it comes
+ * from the past, positive value when it comes from the future.
+ * XXX: set exact relative ref frame reference instead of a +/- 1 "direction".
+ */
+ int32_t source;
+ /**
+ * Width and height of the block.
+ */
+ uint8_t w, h;
+ /**
+ * Absolute source position. Can be outside the frame area.
+ */
+ int16_t src_x, src_y;
+ /**
+ * Absolute destination position. Can be outside the frame area.
+ */
+ int16_t dst_x, dst_y;
+ /**
+ * Extra flag information.
+ * Currently unused.
+ */
+ uint64_t flags;
+ /**
+ * Motion vector
+ * src_x = dst_x + motion_x / motion_scale
+ * src_y = dst_y + motion_y / motion_scale
+ */
+ int32_t motion_x, motion_y;
+ uint16_t motion_scale;
+} AVMotionVector;
+
+#endif /* AVUTIL_MOTION_VECTOR_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/murmur3.h b/TMessagesProj/jni/ffmpeg/include/libavutil/murmur3.h
new file mode 100644
index 000000000..f29ed973e
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/murmur3.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2013 Reimar Döffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MURMUR3_H
+#define AVUTIL_MURMUR3_H
+
+#include
+
+struct AVMurMur3 *av_murmur3_alloc(void);
+void av_murmur3_init_seeded(struct AVMurMur3 *c, uint64_t seed);
+void av_murmur3_init(struct AVMurMur3 *c);
+void av_murmur3_update(struct AVMurMur3 *c, const uint8_t *src, int len);
+void av_murmur3_final(struct AVMurMur3 *c, uint8_t dst[16]);
+
+#endif /* AVUTIL_MURMUR3_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/opt.h b/TMessagesProj/jni/ffmpeg/include/libavutil/opt.h
new file mode 100644
index 000000000..753434d62
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/opt.h
@@ -0,0 +1,865 @@
+/*
+ * AVOptions
+ * copyright (c) 2005 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_OPT_H
+#define AVUTIL_OPT_H
+
+/**
+ * @file
+ * AVOptions
+ */
+
+#include "rational.h"
+#include "avutil.h"
+#include "dict.h"
+#include "log.h"
+#include "pixfmt.h"
+#include "samplefmt.h"
+#include "version.h"
+
+/**
+ * @defgroup avoptions AVOptions
+ * @ingroup lavu_data
+ * @{
+ * AVOptions provide a generic system to declare options on arbitrary structs
+ * ("objects"). An option can have a help text, a type and a range of possible
+ * values. Options may then be enumerated, read and written to.
+ *
+ * @section avoptions_implement Implementing AVOptions
+ * This section describes how to add AVOptions capabilities to a struct.
+ *
+ * All AVOptions-related information is stored in an AVClass. Therefore
+ * the first member of the struct should be a pointer to an AVClass describing it.
+ * The option field of the AVClass must be set to a NULL-terminated static array
+ * of AVOptions. Each AVOption must have a non-empty name, a type, a default
+ * value and for number-type AVOptions also a range of allowed values. It must
+ * also declare an offset in bytes from the start of the struct, where the field
+ * associated with this AVOption is located. Other fields in the AVOption struct
+ * should also be set when applicable, but are not required.
+ *
+ * The following example illustrates an AVOptions-enabled struct:
+ * @code
+ * typedef struct test_struct {
+ * AVClass *class;
+ * int int_opt;
+ * char *str_opt;
+ * uint8_t *bin_opt;
+ * int bin_len;
+ * } test_struct;
+ *
+ * static const AVOption test_options[] = {
+ * { "test_int", "This is a test option of int type.", offsetof(test_struct, int_opt),
+ * AV_OPT_TYPE_INT, { .i64 = -1 }, INT_MIN, INT_MAX },
+ * { "test_str", "This is a test option of string type.", offsetof(test_struct, str_opt),
+ * AV_OPT_TYPE_STRING },
+ * { "test_bin", "This is a test option of binary type.", offsetof(test_struct, bin_opt),
+ * AV_OPT_TYPE_BINARY },
+ * { NULL },
+ * };
+ *
+ * static const AVClass test_class = {
+ * .class_name = "test class",
+ * .item_name = av_default_item_name,
+ * .option = test_options,
+ * .version = LIBAVUTIL_VERSION_INT,
+ * };
+ * @endcode
+ *
+ * Next, when allocating your struct, you must ensure that the AVClass pointer
+ * is set to the correct value. Then, av_opt_set_defaults() can be called to
+ * initialize defaults. After that the struct is ready to be used with the
+ * AVOptions API.
+ *
+ * When cleaning up, you may use the av_opt_free() function to automatically
+ * free all the allocated string and binary options.
+ *
+ * Continuing with the above example:
+ *
+ * @code
+ * test_struct *alloc_test_struct(void)
+ * {
+ * test_struct *ret = av_malloc(sizeof(*ret));
+ * ret->class = &test_class;
+ * av_opt_set_defaults(ret);
+ * return ret;
+ * }
+ * void free_test_struct(test_struct **foo)
+ * {
+ * av_opt_free(*foo);
+ * av_freep(foo);
+ * }
+ * @endcode
+ *
+ * @subsection avoptions_implement_nesting Nesting
+ * It may happen that an AVOptions-enabled struct contains another
+ * AVOptions-enabled struct as a member (e.g. AVCodecContext in
+ * libavcodec exports generic options, while its priv_data field exports
+ * codec-specific options). In such a case, it is possible to set up the
+ * parent struct to export a child's options. To do that, simply
+ * implement AVClass.child_next() and AVClass.child_class_next() in the
+ * parent struct's AVClass.
+ * Assuming that the test_struct from above now also contains a
+ * child_struct field:
+ *
+ * @code
+ * typedef struct child_struct {
+ * AVClass *class;
+ * int flags_opt;
+ * } child_struct;
+ * static const AVOption child_opts[] = {
+ * { "test_flags", "This is a test option of flags type.",
+ * offsetof(child_struct, flags_opt), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT_MIN, INT_MAX },
+ * { NULL },
+ * };
+ * static const AVClass child_class = {
+ * .class_name = "child class",
+ * .item_name = av_default_item_name,
+ * .option = child_opts,
+ * .version = LIBAVUTIL_VERSION_INT,
+ * };
+ *
+ * void *child_next(void *obj, void *prev)
+ * {
+ * test_struct *t = obj;
+ * if (!prev && t->child_struct)
+ * return t->child_struct;
+ * return NULL
+ * }
+ * const AVClass child_class_next(const AVClass *prev)
+ * {
+ * return prev ? NULL : &child_class;
+ * }
+ * @endcode
+ * Putting child_next() and child_class_next() as defined above into
+ * test_class will now make child_struct's options accessible through
+ * test_struct (again, proper setup as described above needs to be done on
+ * child_struct right after it is created).
+ *
+ * From the above example it might not be clear why both child_next()
+ * and child_class_next() are needed. The distinction is that child_next()
+ * iterates over actually existing objects, while child_class_next()
+ * iterates over all possible child classes. E.g. if an AVCodecContext
+ * was initialized to use a codec which has private options, then its
+ * child_next() will return AVCodecContext.priv_data and finish
+ * iterating. OTOH child_class_next() on AVCodecContext.av_class will
+ * iterate over all available codecs with private options.
+ *
+ * @subsection avoptions_implement_named_constants Named constants
+ * It is possible to create named constants for options. Simply set the unit
+ * field of the option the constants should apply to a string and
+ * create the constants themselves as options of type AV_OPT_TYPE_CONST
+ * with their unit field set to the same string.
+ * Their default_val field should contain the value of the named
+ * constant.
+ * For example, to add some named constants for the test_flags option
+ * above, put the following into the child_opts array:
+ * @code
+ * { "test_flags", "This is a test option of flags type.",
+ * offsetof(child_struct, flags_opt), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT_MIN, INT_MAX, "test_unit" },
+ * { "flag1", "This is a flag with value 16", 0, AV_OPT_TYPE_CONST, { .i64 = 16 }, 0, 0, "test_unit" },
+ * @endcode
+ *
+ * @section avoptions_use Using AVOptions
+ * This section deals with accessing options in an AVOptions-enabled struct.
+ * Such structs in FFmpeg are e.g. AVCodecContext in libavcodec or
+ * AVFormatContext in libavformat.
+ *
+ * @subsection avoptions_use_examine Examining AVOptions
+ * The basic functions for examining options are av_opt_next(), which iterates
+ * over all options defined for one object, and av_opt_find(), which searches
+ * for an option with the given name.
+ *
+ * The situation is more complicated with nesting. An AVOptions-enabled struct
+ * may have AVOptions-enabled children. Passing the AV_OPT_SEARCH_CHILDREN flag
+ * to av_opt_find() will make the function search children recursively.
+ *
+ * For enumerating there are basically two cases. The first is when you want to
+ * get all options that may potentially exist on the struct and its children
+ * (e.g. when constructing documentation). In that case you should call
+ * av_opt_child_class_next() recursively on the parent struct's AVClass. The
+ * second case is when you have an already initialized struct with all its
+ * children and you want to get all options that can be actually written or read
+ * from it. In that case you should call av_opt_child_next() recursively (and
+ * av_opt_next() on each result).
+ *
+ * @subsection avoptions_use_get_set Reading and writing AVOptions
+ * When setting options, you often have a string read directly from the
+ * user. In such a case, simply passing it to av_opt_set() is enough. For
+ * non-string type options, av_opt_set() will parse the string according to the
+ * option type.
+ *
+ * Similarly av_opt_get() will read any option type and convert it to a string
+ * which will be returned. Do not forget that the string is allocated, so you
+ * have to free it with av_free().
+ *
+ * In some cases it may be more convenient to put all options into an
+ * AVDictionary and call av_opt_set_dict() on it. A specific case of this
+ * are the format/codec open functions in lavf/lavc which take a dictionary
+ * filled with option as a parameter. This makes it possible to set some options
+ * that cannot be set otherwise, since e.g. the input file format is not known
+ * before the file is actually opened.
+ */
+
+enum AVOptionType{
+ AV_OPT_TYPE_FLAGS,
+ AV_OPT_TYPE_INT,
+ AV_OPT_TYPE_INT64,
+ AV_OPT_TYPE_DOUBLE,
+ AV_OPT_TYPE_FLOAT,
+ AV_OPT_TYPE_STRING,
+ AV_OPT_TYPE_RATIONAL,
+ AV_OPT_TYPE_BINARY, ///< offset must point to a pointer immediately followed by an int for the length
+ AV_OPT_TYPE_DICT,
+ AV_OPT_TYPE_CONST = 128,
+ AV_OPT_TYPE_IMAGE_SIZE = MKBETAG('S','I','Z','E'), ///< offset must point to two consecutive integers
+ AV_OPT_TYPE_PIXEL_FMT = MKBETAG('P','F','M','T'),
+ AV_OPT_TYPE_SAMPLE_FMT = MKBETAG('S','F','M','T'),
+ AV_OPT_TYPE_VIDEO_RATE = MKBETAG('V','R','A','T'), ///< offset must point to AVRational
+ AV_OPT_TYPE_DURATION = MKBETAG('D','U','R',' '),
+ AV_OPT_TYPE_COLOR = MKBETAG('C','O','L','R'),
+ AV_OPT_TYPE_CHANNEL_LAYOUT = MKBETAG('C','H','L','A'),
+ AV_OPT_TYPE_BOOL = MKBETAG('B','O','O','L'),
+};
+
+/**
+ * AVOption
+ */
+typedef struct AVOption {
+ const char *name;
+
+ /**
+ * short English help text
+ * @todo What about other languages?
+ */
+ const char *help;
+
+ /**
+ * The offset relative to the context structure where the option
+ * value is stored. It should be 0 for named constants.
+ */
+ int offset;
+ enum AVOptionType type;
+
+ /**
+ * the default value for scalar options
+ */
+ union {
+ int64_t i64;
+ double dbl;
+ const char *str;
+ /* TODO those are unused now */
+ AVRational q;
+ } default_val;
+ double min; ///< minimum valid value for the option
+ double max; ///< maximum valid value for the option
+
+ int flags;
+#define AV_OPT_FLAG_ENCODING_PARAM 1 ///< a generic parameter which can be set by the user for muxing or encoding
+#define AV_OPT_FLAG_DECODING_PARAM 2 ///< a generic parameter which can be set by the user for demuxing or decoding
+#if FF_API_OPT_TYPE_METADATA
+#define AV_OPT_FLAG_METADATA 4 ///< some data extracted or inserted into the file like title, comment, ...
+#endif
+#define AV_OPT_FLAG_AUDIO_PARAM 8
+#define AV_OPT_FLAG_VIDEO_PARAM 16
+#define AV_OPT_FLAG_SUBTITLE_PARAM 32
+/**
+ * The option is inteded for exporting values to the caller.
+ */
+#define AV_OPT_FLAG_EXPORT 64
+/**
+ * The option may not be set through the AVOptions API, only read.
+ * This flag only makes sense when AV_OPT_FLAG_EXPORT is also set.
+ */
+#define AV_OPT_FLAG_READONLY 128
+#define AV_OPT_FLAG_FILTERING_PARAM (1<<16) ///< a generic parameter which can be set by the user for filtering
+//FIXME think about enc-audio, ... style flags
+
+ /**
+ * The logical unit to which the option belongs. Non-constant
+ * options and corresponding named constants share the same
+ * unit. May be NULL.
+ */
+ const char *unit;
+} AVOption;
+
+/**
+ * A single allowed range of values, or a single allowed value.
+ */
+typedef struct AVOptionRange {
+ const char *str;
+ /**
+ * Value range.
+ * For string ranges this represents the min/max length.
+ * For dimensions this represents the min/max pixel count or width/height in multi-component case.
+ */
+ double value_min, value_max;
+ /**
+ * Value's component range.
+ * For string this represents the unicode range for chars, 0-127 limits to ASCII.
+ */
+ double component_min, component_max;
+ /**
+ * Range flag.
+ * If set to 1 the struct encodes a range, if set to 0 a single value.
+ */
+ int is_range;
+} AVOptionRange;
+
+/**
+ * List of AVOptionRange structs.
+ */
+typedef struct AVOptionRanges {
+ /**
+ * Array of option ranges.
+ *
+ * Most of option types use just one component.
+ * Following describes multi-component option types:
+ *
+ * AV_OPT_TYPE_IMAGE_SIZE:
+ * component index 0: range of pixel count (width * height).
+ * component index 1: range of width.
+ * component index 2: range of height.
+ *
+ * @note To obtain multi-component version of this structure, user must
+ * provide AV_OPT_MULTI_COMPONENT_RANGE to av_opt_query_ranges or
+ * av_opt_query_ranges_default function.
+ *
+ * Multi-component range can be read as in following example:
+ *
+ * @code
+ * int range_index, component_index;
+ * AVOptionRanges *ranges;
+ * AVOptionRange *range[3]; //may require more than 3 in the future.
+ * av_opt_query_ranges(&ranges, obj, key, AV_OPT_MULTI_COMPONENT_RANGE);
+ * for (range_index = 0; range_index < ranges->nb_ranges; range_index++) {
+ * for (component_index = 0; component_index < ranges->nb_components; component_index++)
+ * range[component_index] = ranges->range[ranges->nb_ranges * component_index + range_index];
+ * //do something with range here.
+ * }
+ * av_opt_freep_ranges(&ranges);
+ * @endcode
+ */
+ AVOptionRange **range;
+ /**
+ * Number of ranges per component.
+ */
+ int nb_ranges;
+ /**
+ * Number of componentes.
+ */
+ int nb_components;
+} AVOptionRanges;
+
+/**
+ * Show the obj options.
+ *
+ * @param req_flags requested flags for the options to show. Show only the
+ * options for which it is opt->flags & req_flags.
+ * @param rej_flags rejected flags for the options to show. Show only the
+ * options for which it is !(opt->flags & req_flags).
+ * @param av_log_obj log context to use for showing the options
+ */
+int av_opt_show2(void *obj, void *av_log_obj, int req_flags, int rej_flags);
+
+/**
+ * Set the values of all AVOption fields to their default values.
+ *
+ * @param s an AVOption-enabled struct (its first member must be a pointer to AVClass)
+ */
+void av_opt_set_defaults(void *s);
+
+/**
+ * Set the values of all AVOption fields to their default values. Only these
+ * AVOption fields for which (opt->flags & mask) == flags will have their
+ * default applied to s.
+ *
+ * @param s an AVOption-enabled struct (its first member must be a pointer to AVClass)
+ * @param mask combination of AV_OPT_FLAG_*
+ * @param flags combination of AV_OPT_FLAG_*
+ */
+void av_opt_set_defaults2(void *s, int mask, int flags);
+
+/**
+ * Parse the key/value pairs list in opts. For each key/value pair
+ * found, stores the value in the field in ctx that is named like the
+ * key. ctx must be an AVClass context, storing is done using
+ * AVOptions.
+ *
+ * @param opts options string to parse, may be NULL
+ * @param key_val_sep a 0-terminated list of characters used to
+ * separate key from value
+ * @param pairs_sep a 0-terminated list of characters used to separate
+ * two pairs from each other
+ * @return the number of successfully set key/value pairs, or a negative
+ * value corresponding to an AVERROR code in case of error:
+ * AVERROR(EINVAL) if opts cannot be parsed,
+ * the error code issued by av_opt_set() if a key/value pair
+ * cannot be set
+ */
+int av_set_options_string(void *ctx, const char *opts,
+ const char *key_val_sep, const char *pairs_sep);
+
+/**
+ * Parse the key-value pairs list in opts. For each key=value pair found,
+ * set the value of the corresponding option in ctx.
+ *
+ * @param ctx the AVClass object to set options on
+ * @param opts the options string, key-value pairs separated by a
+ * delimiter
+ * @param shorthand a NULL-terminated array of options names for shorthand
+ * notation: if the first field in opts has no key part,
+ * the key is taken from the first element of shorthand;
+ * then again for the second, etc., until either opts is
+ * finished, shorthand is finished or a named option is
+ * found; after that, all options must be named
+ * @param key_val_sep a 0-terminated list of characters used to separate
+ * key from value, for example '='
+ * @param pairs_sep a 0-terminated list of characters used to separate
+ * two pairs from each other, for example ':' or ','
+ * @return the number of successfully set key=value pairs, or a negative
+ * value corresponding to an AVERROR code in case of error:
+ * AVERROR(EINVAL) if opts cannot be parsed,
+ * the error code issued by av_set_string3() if a key/value pair
+ * cannot be set
+ *
+ * Options names must use only the following characters: a-z A-Z 0-9 - . / _
+ * Separators must use characters distinct from option names and from each
+ * other.
+ */
+int av_opt_set_from_string(void *ctx, const char *opts,
+ const char *const *shorthand,
+ const char *key_val_sep, const char *pairs_sep);
+/**
+ * Free all allocated objects in obj.
+ */
+void av_opt_free(void *obj);
+
+/**
+ * Check whether a particular flag is set in a flags field.
+ *
+ * @param field_name the name of the flag field option
+ * @param flag_name the name of the flag to check
+ * @return non-zero if the flag is set, zero if the flag isn't set,
+ * isn't of the right type, or the flags field doesn't exist.
+ */
+int av_opt_flag_is_set(void *obj, const char *field_name, const char *flag_name);
+
+/**
+ * Set all the options from a given dictionary on an object.
+ *
+ * @param obj a struct whose first element is a pointer to AVClass
+ * @param options options to process. This dictionary will be freed and replaced
+ * by a new one containing all options not found in obj.
+ * Of course this new dictionary needs to be freed by caller
+ * with av_dict_free().
+ *
+ * @return 0 on success, a negative AVERROR if some option was found in obj,
+ * but could not be set.
+ *
+ * @see av_dict_copy()
+ */
+int av_opt_set_dict(void *obj, struct AVDictionary **options);
+
+
+/**
+ * Set all the options from a given dictionary on an object.
+ *
+ * @param obj a struct whose first element is a pointer to AVClass
+ * @param options options to process. This dictionary will be freed and replaced
+ * by a new one containing all options not found in obj.
+ * Of course this new dictionary needs to be freed by caller
+ * with av_dict_free().
+ * @param search_flags A combination of AV_OPT_SEARCH_*.
+ *
+ * @return 0 on success, a negative AVERROR if some option was found in obj,
+ * but could not be set.
+ *
+ * @see av_dict_copy()
+ */
+int av_opt_set_dict2(void *obj, struct AVDictionary **options, int search_flags);
+
+/**
+ * Extract a key-value pair from the beginning of a string.
+ *
+ * @param ropts pointer to the options string, will be updated to
+ * point to the rest of the string (one of the pairs_sep
+ * or the final NUL)
+ * @param key_val_sep a 0-terminated list of characters used to separate
+ * key from value, for example '='
+ * @param pairs_sep a 0-terminated list of characters used to separate
+ * two pairs from each other, for example ':' or ','
+ * @param flags flags; see the AV_OPT_FLAG_* values below
+ * @param rkey parsed key; must be freed using av_free()
+ * @param rval parsed value; must be freed using av_free()
+ *
+ * @return >=0 for success, or a negative value corresponding to an
+ * AVERROR code in case of error; in particular:
+ * AVERROR(EINVAL) if no key is present
+ *
+ */
+int av_opt_get_key_value(const char **ropts,
+ const char *key_val_sep, const char *pairs_sep,
+ unsigned flags,
+ char **rkey, char **rval);
+
+enum {
+
+ /**
+ * Accept to parse a value without a key; the key will then be returned
+ * as NULL.
+ */
+ AV_OPT_FLAG_IMPLICIT_KEY = 1,
+};
+
+/**
+ * @defgroup opt_eval_funcs Evaluating option strings
+ * @{
+ * This group of functions can be used to evaluate option strings
+ * and get numbers out of them. They do the same thing as av_opt_set(),
+ * except the result is written into the caller-supplied pointer.
+ *
+ * @param obj a struct whose first element is a pointer to AVClass.
+ * @param o an option for which the string is to be evaluated.
+ * @param val string to be evaluated.
+ * @param *_out value of the string will be written here.
+ *
+ * @return 0 on success, a negative number on failure.
+ */
+int av_opt_eval_flags (void *obj, const AVOption *o, const char *val, int *flags_out);
+int av_opt_eval_int (void *obj, const AVOption *o, const char *val, int *int_out);
+int av_opt_eval_int64 (void *obj, const AVOption *o, const char *val, int64_t *int64_out);
+int av_opt_eval_float (void *obj, const AVOption *o, const char *val, float *float_out);
+int av_opt_eval_double(void *obj, const AVOption *o, const char *val, double *double_out);
+int av_opt_eval_q (void *obj, const AVOption *o, const char *val, AVRational *q_out);
+/**
+ * @}
+ */
+
+#define AV_OPT_SEARCH_CHILDREN (1 << 0) /**< Search in possible children of the
+ given object first. */
+/**
+ * The obj passed to av_opt_find() is fake -- only a double pointer to AVClass
+ * instead of a required pointer to a struct containing AVClass. This is
+ * useful for searching for options without needing to allocate the corresponding
+ * object.
+ */
+#define AV_OPT_SEARCH_FAKE_OBJ (1 << 1)
+
+/**
+ * In av_opt_get, return NULL if the option has a pointer type and is set to NULL,
+ * rather than returning an empty string.
+ */
+#define AV_OPT_ALLOW_NULL (1 << 2)
+
+/**
+ * Allows av_opt_query_ranges and av_opt_query_ranges_default to return more than
+ * one component for certain option types.
+ * @see AVOptionRanges for details.
+ */
+#define AV_OPT_MULTI_COMPONENT_RANGE (1 << 12)
+
+/**
+ * Look for an option in an object. Consider only options which
+ * have all the specified flags set.
+ *
+ * @param[in] obj A pointer to a struct whose first element is a
+ * pointer to an AVClass.
+ * Alternatively a double pointer to an AVClass, if
+ * AV_OPT_SEARCH_FAKE_OBJ search flag is set.
+ * @param[in] name The name of the option to look for.
+ * @param[in] unit When searching for named constants, name of the unit
+ * it belongs to.
+ * @param opt_flags Find only options with all the specified flags set (AV_OPT_FLAG).
+ * @param search_flags A combination of AV_OPT_SEARCH_*.
+ *
+ * @return A pointer to the option found, or NULL if no option
+ * was found.
+ *
+ * @note Options found with AV_OPT_SEARCH_CHILDREN flag may not be settable
+ * directly with av_opt_set(). Use special calls which take an options
+ * AVDictionary (e.g. avformat_open_input()) to set options found with this
+ * flag.
+ */
+const AVOption *av_opt_find(void *obj, const char *name, const char *unit,
+ int opt_flags, int search_flags);
+
+/**
+ * Look for an option in an object. Consider only options which
+ * have all the specified flags set.
+ *
+ * @param[in] obj A pointer to a struct whose first element is a
+ * pointer to an AVClass.
+ * Alternatively a double pointer to an AVClass, if
+ * AV_OPT_SEARCH_FAKE_OBJ search flag is set.
+ * @param[in] name The name of the option to look for.
+ * @param[in] unit When searching for named constants, name of the unit
+ * it belongs to.
+ * @param opt_flags Find only options with all the specified flags set (AV_OPT_FLAG).
+ * @param search_flags A combination of AV_OPT_SEARCH_*.
+ * @param[out] target_obj if non-NULL, an object to which the option belongs will be
+ * written here. It may be different from obj if AV_OPT_SEARCH_CHILDREN is present
+ * in search_flags. This parameter is ignored if search_flags contain
+ * AV_OPT_SEARCH_FAKE_OBJ.
+ *
+ * @return A pointer to the option found, or NULL if no option
+ * was found.
+ */
+const AVOption *av_opt_find2(void *obj, const char *name, const char *unit,
+ int opt_flags, int search_flags, void **target_obj);
+
+/**
+ * Iterate over all AVOptions belonging to obj.
+ *
+ * @param obj an AVOptions-enabled struct or a double pointer to an
+ * AVClass describing it.
+ * @param prev result of the previous call to av_opt_next() on this object
+ * or NULL
+ * @return next AVOption or NULL
+ */
+const AVOption *av_opt_next(const void *obj, const AVOption *prev);
+
+/**
+ * Iterate over AVOptions-enabled children of obj.
+ *
+ * @param prev result of a previous call to this function or NULL
+ * @return next AVOptions-enabled child or NULL
+ */
+void *av_opt_child_next(void *obj, void *prev);
+
+/**
+ * Iterate over potential AVOptions-enabled children of parent.
+ *
+ * @param prev result of a previous call to this function or NULL
+ * @return AVClass corresponding to next potential child or NULL
+ */
+const AVClass *av_opt_child_class_next(const AVClass *parent, const AVClass *prev);
+
+/**
+ * @defgroup opt_set_funcs Option setting functions
+ * @{
+ * Those functions set the field of obj with the given name to value.
+ *
+ * @param[in] obj A struct whose first element is a pointer to an AVClass.
+ * @param[in] name the name of the field to set
+ * @param[in] val The value to set. In case of av_opt_set() if the field is not
+ * of a string type, then the given string is parsed.
+ * SI postfixes and some named scalars are supported.
+ * If the field is of a numeric type, it has to be a numeric or named
+ * scalar. Behavior with more than one scalar and +- infix operators
+ * is undefined.
+ * If the field is of a flags type, it has to be a sequence of numeric
+ * scalars or named flags separated by '+' or '-'. Prefixing a flag
+ * with '+' causes it to be set without affecting the other flags;
+ * similarly, '-' unsets a flag.
+ * @param search_flags flags passed to av_opt_find2. I.e. if AV_OPT_SEARCH_CHILDREN
+ * is passed here, then the option may be set on a child of obj.
+ *
+ * @return 0 if the value has been set, or an AVERROR code in case of
+ * error:
+ * AVERROR_OPTION_NOT_FOUND if no matching option exists
+ * AVERROR(ERANGE) if the value is out of range
+ * AVERROR(EINVAL) if the value is not valid
+ */
+int av_opt_set (void *obj, const char *name, const char *val, int search_flags);
+int av_opt_set_int (void *obj, const char *name, int64_t val, int search_flags);
+int av_opt_set_double (void *obj, const char *name, double val, int search_flags);
+int av_opt_set_q (void *obj, const char *name, AVRational val, int search_flags);
+int av_opt_set_bin (void *obj, const char *name, const uint8_t *val, int size, int search_flags);
+int av_opt_set_image_size(void *obj, const char *name, int w, int h, int search_flags);
+int av_opt_set_pixel_fmt (void *obj, const char *name, enum AVPixelFormat fmt, int search_flags);
+int av_opt_set_sample_fmt(void *obj, const char *name, enum AVSampleFormat fmt, int search_flags);
+int av_opt_set_video_rate(void *obj, const char *name, AVRational val, int search_flags);
+int av_opt_set_channel_layout(void *obj, const char *name, int64_t ch_layout, int search_flags);
+/**
+ * @note Any old dictionary present is discarded and replaced with a copy of the new one. The
+ * caller still owns val is and responsible for freeing it.
+ */
+int av_opt_set_dict_val(void *obj, const char *name, const AVDictionary *val, int search_flags);
+
+/**
+ * Set a binary option to an integer list.
+ *
+ * @param obj AVClass object to set options on
+ * @param name name of the binary option
+ * @param val pointer to an integer list (must have the correct type with
+ * regard to the contents of the list)
+ * @param term list terminator (usually 0 or -1)
+ * @param flags search flags
+ */
+#define av_opt_set_int_list(obj, name, val, term, flags) \
+ (av_int_list_length(val, term) > INT_MAX / sizeof(*(val)) ? \
+ AVERROR(EINVAL) : \
+ av_opt_set_bin(obj, name, (const uint8_t *)(val), \
+ av_int_list_length(val, term) * sizeof(*(val)), flags))
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup opt_get_funcs Option getting functions
+ * @{
+ * Those functions get a value of the option with the given name from an object.
+ *
+ * @param[in] obj a struct whose first element is a pointer to an AVClass.
+ * @param[in] name name of the option to get.
+ * @param[in] search_flags flags passed to av_opt_find2. I.e. if AV_OPT_SEARCH_CHILDREN
+ * is passed here, then the option may be found in a child of obj.
+ * @param[out] out_val value of the option will be written here
+ * @return >=0 on success, a negative error code otherwise
+ */
+/**
+ * @note the returned string will be av_malloc()ed and must be av_free()ed by the caller
+ *
+ * @note if AV_OPT_ALLOW_NULL is set in search_flags in av_opt_get, and the option has
+ * AV_OPT_TYPE_STRING or AV_OPT_TYPE_BINARY and is set to NULL, *out_val will be set
+ * to NULL instead of an allocated empty string.
+ */
+int av_opt_get (void *obj, const char *name, int search_flags, uint8_t **out_val);
+int av_opt_get_int (void *obj, const char *name, int search_flags, int64_t *out_val);
+int av_opt_get_double (void *obj, const char *name, int search_flags, double *out_val);
+int av_opt_get_q (void *obj, const char *name, int search_flags, AVRational *out_val);
+int av_opt_get_image_size(void *obj, const char *name, int search_flags, int *w_out, int *h_out);
+int av_opt_get_pixel_fmt (void *obj, const char *name, int search_flags, enum AVPixelFormat *out_fmt);
+int av_opt_get_sample_fmt(void *obj, const char *name, int search_flags, enum AVSampleFormat *out_fmt);
+int av_opt_get_video_rate(void *obj, const char *name, int search_flags, AVRational *out_val);
+int av_opt_get_channel_layout(void *obj, const char *name, int search_flags, int64_t *ch_layout);
+/**
+ * @param[out] out_val The returned dictionary is a copy of the actual value and must
+ * be freed with av_dict_free() by the caller
+ */
+int av_opt_get_dict_val(void *obj, const char *name, int search_flags, AVDictionary **out_val);
+/**
+ * @}
+ */
+/**
+ * Gets a pointer to the requested field in a struct.
+ * This function allows accessing a struct even when its fields are moved or
+ * renamed since the application making the access has been compiled,
+ *
+ * @returns a pointer to the field, it can be cast to the correct type and read
+ * or written to.
+ */
+void *av_opt_ptr(const AVClass *avclass, void *obj, const char *name);
+
+/**
+ * Free an AVOptionRanges struct and set it to NULL.
+ */
+void av_opt_freep_ranges(AVOptionRanges **ranges);
+
+/**
+ * Get a list of allowed ranges for the given option.
+ *
+ * The returned list may depend on other fields in obj like for example profile.
+ *
+ * @param flags is a bitmask of flags, undefined flags should not be set and should be ignored
+ * AV_OPT_SEARCH_FAKE_OBJ indicates that the obj is a double pointer to a AVClass instead of a full instance
+ * AV_OPT_MULTI_COMPONENT_RANGE indicates that function may return more than one component, @see AVOptionRanges
+ *
+ * The result must be freed with av_opt_freep_ranges.
+ *
+ * @return number of compontents returned on success, a negative errro code otherwise
+ */
+int av_opt_query_ranges(AVOptionRanges **, void *obj, const char *key, int flags);
+
+/**
+ * Copy options from src object into dest object.
+ *
+ * Options that require memory allocation (e.g. string or binary) are malloc'ed in dest object.
+ * Original memory allocated for such options is freed unless both src and dest options points to the same memory.
+ *
+ * @param dest Object to copy from
+ * @param src Object to copy into
+ * @return 0 on success, negative on error
+ */
+int av_opt_copy(void *dest, const void *src);
+
+/**
+ * Get a default list of allowed ranges for the given option.
+ *
+ * This list is constructed without using the AVClass.query_ranges() callback
+ * and can be used as fallback from within the callback.
+ *
+ * @param flags is a bitmask of flags, undefined flags should not be set and should be ignored
+ * AV_OPT_SEARCH_FAKE_OBJ indicates that the obj is a double pointer to a AVClass instead of a full instance
+ * AV_OPT_MULTI_COMPONENT_RANGE indicates that function may return more than one component, @see AVOptionRanges
+ *
+ * The result must be freed with av_opt_free_ranges.
+ *
+ * @return number of compontents returned on success, a negative errro code otherwise
+ */
+int av_opt_query_ranges_default(AVOptionRanges **, void *obj, const char *key, int flags);
+
+/**
+ * Check if given option is set to its default value.
+ *
+ * Options o must belong to the obj. This function must not be called to check child's options state.
+ * @see av_opt_is_set_to_default_by_name().
+ *
+ * @param obj AVClass object to check option on
+ * @param o option to be checked
+ * @return >0 when option is set to its default,
+ * 0 when option is not set its default,
+ * <0 on error
+ */
+int av_opt_is_set_to_default(void *obj, const AVOption *o);
+
+/**
+ * Check if given option is set to its default value.
+ *
+ * @param obj AVClass object to check option on
+ * @param name option name
+ * @param search_flags combination of AV_OPT_SEARCH_*
+ * @return >0 when option is set to its default,
+ * 0 when option is not set its default,
+ * <0 on error
+ */
+int av_opt_is_set_to_default_by_name(void *obj, const char *name, int search_flags);
+
+
+#define AV_OPT_SERIALIZE_SKIP_DEFAULTS 0x00000001 ///< Serialize options that are not set to default values only.
+#define AV_OPT_SERIALIZE_OPT_FLAGS_EXACT 0x00000002 ///< Serialize options that exactly match opt_flags only.
+
+/**
+ * Serialize object's options.
+ *
+ * Create a string containing object's serialized options.
+ * Such string may be passed back to av_opt_set_from_string() in order to restore option values.
+ * A key/value or pairs separator occurring in the serialized value or
+ * name string are escaped through the av_escape() function.
+ *
+ * @param[in] obj AVClass object to serialize
+ * @param[in] opt_flags serialize options with all the specified flags set (AV_OPT_FLAG)
+ * @param[in] flags combination of AV_OPT_SERIALIZE_* flags
+ * @param[out] buffer Pointer to buffer that will be allocated with string containg serialized options.
+ * Buffer must be freed by the caller when is no longer needed.
+ * @param[in] key_val_sep character used to separate key from value
+ * @param[in] pairs_sep character used to separate two pairs from each other
+ * @return >= 0 on success, negative on error
+ * @warning Separators cannot be neither '\\' nor '\0'. They also cannot be the same.
+ */
+int av_opt_serialize(void *obj, int opt_flags, int flags, char **buffer,
+ const char key_val_sep, const char pairs_sep);
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_OPT_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/parseutils.h b/TMessagesProj/jni/ffmpeg/include/libavutil/parseutils.h
new file mode 100644
index 000000000..e66d24b76
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/parseutils.h
@@ -0,0 +1,193 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PARSEUTILS_H
+#define AVUTIL_PARSEUTILS_H
+
+#include
+
+#include "rational.h"
+
+/**
+ * @file
+ * misc parsing utilities
+ */
+
+/**
+ * Parse str and store the parsed ratio in q.
+ *
+ * Note that a ratio with infinite (1/0) or negative value is
+ * considered valid, so you should check on the returned value if you
+ * want to exclude those values.
+ *
+ * The undefined value can be expressed using the "0:0" string.
+ *
+ * @param[in,out] q pointer to the AVRational which will contain the ratio
+ * @param[in] str the string to parse: it has to be a string in the format
+ * num:den, a float number or an expression
+ * @param[in] max the maximum allowed numerator and denominator
+ * @param[in] log_offset log level offset which is applied to the log
+ * level of log_ctx
+ * @param[in] log_ctx parent logging context
+ * @return >= 0 on success, a negative error code otherwise
+ */
+int av_parse_ratio(AVRational *q, const char *str, int max,
+ int log_offset, void *log_ctx);
+
+#define av_parse_ratio_quiet(rate, str, max) \
+ av_parse_ratio(rate, str, max, AV_LOG_MAX_OFFSET, NULL)
+
+/**
+ * Parse str and put in width_ptr and height_ptr the detected values.
+ *
+ * @param[in,out] width_ptr pointer to the variable which will contain the detected
+ * width value
+ * @param[in,out] height_ptr pointer to the variable which will contain the detected
+ * height value
+ * @param[in] str the string to parse: it has to be a string in the format
+ * width x height or a valid video size abbreviation.
+ * @return >= 0 on success, a negative error code otherwise
+ */
+int av_parse_video_size(int *width_ptr, int *height_ptr, const char *str);
+
+/**
+ * Parse str and store the detected values in *rate.
+ *
+ * @param[in,out] rate pointer to the AVRational which will contain the detected
+ * frame rate
+ * @param[in] str the string to parse: it has to be a string in the format
+ * rate_num / rate_den, a float number or a valid video rate abbreviation
+ * @return >= 0 on success, a negative error code otherwise
+ */
+int av_parse_video_rate(AVRational *rate, const char *str);
+
+/**
+ * Put the RGBA values that correspond to color_string in rgba_color.
+ *
+ * @param color_string a string specifying a color. It can be the name of
+ * a color (case insensitive match) or a [0x|#]RRGGBB[AA] sequence,
+ * possibly followed by "@" and a string representing the alpha
+ * component.
+ * The alpha component may be a string composed by "0x" followed by an
+ * hexadecimal number or a decimal number between 0.0 and 1.0, which
+ * represents the opacity value (0x00/0.0 means completely transparent,
+ * 0xff/1.0 completely opaque).
+ * If the alpha component is not specified then 0xff is assumed.
+ * The string "random" will result in a random color.
+ * @param slen length of the initial part of color_string containing the
+ * color. It can be set to -1 if color_string is a null terminated string
+ * containing nothing else than the color.
+ * @return >= 0 in case of success, a negative value in case of
+ * failure (for example if color_string cannot be parsed).
+ */
+int av_parse_color(uint8_t *rgba_color, const char *color_string, int slen,
+ void *log_ctx);
+
+/**
+ * Get the name of a color from the internal table of hard-coded named
+ * colors.
+ *
+ * This function is meant to enumerate the color names recognized by
+ * av_parse_color().
+ *
+ * @param color_idx index of the requested color, starting from 0
+ * @param rgbp if not NULL, will point to a 3-elements array with the color value in RGB
+ * @return the color name string or NULL if color_idx is not in the array
+ */
+const char *av_get_known_color_name(int color_idx, const uint8_t **rgb);
+
+/**
+ * Parse timestr and return in *time a corresponding number of
+ * microseconds.
+ *
+ * @param timeval puts here the number of microseconds corresponding
+ * to the string in timestr. If the string represents a duration, it
+ * is the number of microseconds contained in the time interval. If
+ * the string is a date, is the number of microseconds since 1st of
+ * January, 1970 up to the time of the parsed date. If timestr cannot
+ * be successfully parsed, set *time to INT64_MIN.
+
+ * @param timestr a string representing a date or a duration.
+ * - If a date the syntax is:
+ * @code
+ * [{YYYY-MM-DD|YYYYMMDD}[T|t| ]]{{HH:MM:SS[.m...]]]}|{HHMMSS[.m...]]]}}[Z]
+ * now
+ * @endcode
+ * If the value is "now" it takes the current time.
+ * Time is local time unless Z is appended, in which case it is
+ * interpreted as UTC.
+ * If the year-month-day part is not specified it takes the current
+ * year-month-day.
+ * - If a duration the syntax is:
+ * @code
+ * [-][HH:]MM:SS[.m...]
+ * [-]S+[.m...]
+ * @endcode
+ * @param duration flag which tells how to interpret timestr, if not
+ * zero timestr is interpreted as a duration, otherwise as a date
+ * @return >= 0 in case of success, a negative value corresponding to an
+ * AVERROR code otherwise
+ */
+int av_parse_time(int64_t *timeval, const char *timestr, int duration);
+
+/**
+ * Attempt to find a specific tag in a URL.
+ *
+ * syntax: '?tag1=val1&tag2=val2...'. Little URL decoding is done.
+ * Return 1 if found.
+ */
+int av_find_info_tag(char *arg, int arg_size, const char *tag1, const char *info);
+
+/**
+ * Simplified version of strptime
+ *
+ * Parse the input string p according to the format string fmt and
+ * store its results in the structure dt.
+ * This implementation supports only a subset of the formats supported
+ * by the standard strptime().
+ *
+ * The supported input field descriptors are listed below.
+ * - %H: the hour as a decimal number, using a 24-hour clock, in the
+ * range '00' through '23'
+ * - %J: hours as a decimal number, in the range '0' through INT_MAX
+ * - %M: the minute as a decimal number, using a 24-hour clock, in the
+ * range '00' through '59'
+ * - %S: the second as a decimal number, using a 24-hour clock, in the
+ * range '00' through '59'
+ * - %Y: the year as a decimal number, using the Gregorian calendar
+ * - %m: the month as a decimal number, in the range '1' through '12'
+ * - %d: the day of the month as a decimal number, in the range '1'
+ * through '31'
+ * - %T: alias for '%H:%M:%S'
+ * - %%: a literal '%'
+ *
+ * @return a pointer to the first character not processed in this function
+ * call. In case the input string contains more characters than
+ * required by the format string the return value points right after
+ * the last consumed input character. In case the whole input string
+ * is consumed the return value points to the null byte at the end of
+ * the string. On failure NULL is returned.
+ */
+char *av_small_strptime(const char *p, const char *fmt, struct tm *dt);
+
+/**
+ * Convert the decomposed UTC time in tm to a time_t value.
+ */
+time_t av_timegm(struct tm *tm);
+
+#endif /* AVUTIL_PARSEUTILS_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/pixdesc.h b/TMessagesProj/jni/ffmpeg/include/libavutil/pixdesc.h
new file mode 100644
index 000000000..a6056fe1c
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/pixdesc.h
@@ -0,0 +1,394 @@
+/*
+ * pixel format descriptor
+ * Copyright (c) 2009 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PIXDESC_H
+#define AVUTIL_PIXDESC_H
+
+#include
+
+#include "attributes.h"
+#include "pixfmt.h"
+#include "version.h"
+
+typedef struct AVComponentDescriptor {
+ /**
+ * Which of the 4 planes contains the component.
+ */
+ int plane;
+
+ /**
+ * Number of elements between 2 horizontally consecutive pixels.
+ * Elements are bits for bitstream formats, bytes otherwise.
+ */
+ int step;
+
+ /**
+ * Number of elements before the component of the first pixel.
+ * Elements are bits for bitstream formats, bytes otherwise.
+ */
+ int offset;
+
+ /**
+ * Number of least significant bits that must be shifted away
+ * to get the value.
+ */
+ int shift;
+
+ /**
+ * Number of bits in the component.
+ */
+ int depth;
+
+#if FF_API_PLUS1_MINUS1
+ /** deprecated, use step instead */
+ attribute_deprecated int step_minus1;
+
+ /** deprecated, use depth instead */
+ attribute_deprecated int depth_minus1;
+
+ /** deprecated, use offset instead */
+ attribute_deprecated int offset_plus1;
+#endif
+} AVComponentDescriptor;
+
+/**
+ * Descriptor that unambiguously describes how the bits of a pixel are
+ * stored in the up to 4 data planes of an image. It also stores the
+ * subsampling factors and number of components.
+ *
+ * @note This is separate of the colorspace (RGB, YCbCr, YPbPr, JPEG-style YUV
+ * and all the YUV variants) AVPixFmtDescriptor just stores how values
+ * are stored not what these values represent.
+ */
+typedef struct AVPixFmtDescriptor {
+ const char *name;
+ uint8_t nb_components; ///< The number of components each pixel has, (1-4)
+
+ /**
+ * Amount to shift the luma width right to find the chroma width.
+ * For YV12 this is 1 for example.
+ * chroma_width = -((-luma_width) >> log2_chroma_w)
+ * The note above is needed to ensure rounding up.
+ * This value only refers to the chroma components.
+ */
+ uint8_t log2_chroma_w; ///< chroma_width = -((-luma_width )>>log2_chroma_w)
+
+ /**
+ * Amount to shift the luma height right to find the chroma height.
+ * For YV12 this is 1 for example.
+ * chroma_height= -((-luma_height) >> log2_chroma_h)
+ * The note above is needed to ensure rounding up.
+ * This value only refers to the chroma components.
+ */
+ uint8_t log2_chroma_h;
+
+ /**
+ * Combination of AV_PIX_FMT_FLAG_... flags.
+ */
+ uint64_t flags;
+
+ /**
+ * Parameters that describe how pixels are packed.
+ * If the format has 1 or 2 components, then luma is 0.
+ * If the format has 3 or 4 components:
+ * if the RGB flag is set then 0 is red, 1 is green and 2 is blue;
+ * otherwise 0 is luma, 1 is chroma-U and 2 is chroma-V.
+ *
+ * If present, the Alpha channel is always the last component.
+ */
+ AVComponentDescriptor comp[4];
+
+ /**
+ * Alternative comma-separated names.
+ */
+ const char *alias;
+} AVPixFmtDescriptor;
+
+/**
+ * Pixel format is big-endian.
+ */
+#define AV_PIX_FMT_FLAG_BE (1 << 0)
+/**
+ * Pixel format has a palette in data[1], values are indexes in this palette.
+ */
+#define AV_PIX_FMT_FLAG_PAL (1 << 1)
+/**
+ * All values of a component are bit-wise packed end to end.
+ */
+#define AV_PIX_FMT_FLAG_BITSTREAM (1 << 2)
+/**
+ * Pixel format is an HW accelerated format.
+ */
+#define AV_PIX_FMT_FLAG_HWACCEL (1 << 3)
+/**
+ * At least one pixel component is not in the first data plane.
+ */
+#define AV_PIX_FMT_FLAG_PLANAR (1 << 4)
+/**
+ * The pixel format contains RGB-like data (as opposed to YUV/grayscale).
+ */
+#define AV_PIX_FMT_FLAG_RGB (1 << 5)
+
+/**
+ * The pixel format is "pseudo-paletted". This means that it contains a
+ * fixed palette in the 2nd plane but the palette is fixed/constant for each
+ * PIX_FMT. This allows interpreting the data as if it was PAL8, which can
+ * in some cases be simpler. Or the data can be interpreted purely based on
+ * the pixel format without using the palette.
+ * An example of a pseudo-paletted format is AV_PIX_FMT_GRAY8
+ */
+#define AV_PIX_FMT_FLAG_PSEUDOPAL (1 << 6)
+
+/**
+ * The pixel format has an alpha channel. This is set on all formats that
+ * support alpha in some way. The exception is AV_PIX_FMT_PAL8, which can
+ * carry alpha as part of the palette. Details are explained in the
+ * AVPixelFormat enum, and are also encoded in the corresponding
+ * AVPixFmtDescriptor.
+ *
+ * The alpha is always straight, never pre-multiplied.
+ *
+ * If a codec or a filter does not support alpha, it should set all alpha to
+ * opaque, or use the equivalent pixel formats without alpha component, e.g.
+ * AV_PIX_FMT_RGB0 (or AV_PIX_FMT_RGB24 etc.) instead of AV_PIX_FMT_RGBA.
+ */
+#define AV_PIX_FMT_FLAG_ALPHA (1 << 7)
+
+/**
+ * Read a line from an image, and write the values of the
+ * pixel format component c to dst.
+ *
+ * @param data the array containing the pointers to the planes of the image
+ * @param linesize the array containing the linesizes of the image
+ * @param desc the pixel format descriptor for the image
+ * @param x the horizontal coordinate of the first pixel to read
+ * @param y the vertical coordinate of the first pixel to read
+ * @param w the width of the line to read, that is the number of
+ * values to write to dst
+ * @param read_pal_component if not zero and the format is a paletted
+ * format writes the values corresponding to the palette
+ * component c in data[1] to dst, rather than the palette indexes in
+ * data[0]. The behavior is undefined if the format is not paletted.
+ */
+void av_read_image_line(uint16_t *dst, const uint8_t *data[4],
+ const int linesize[4], const AVPixFmtDescriptor *desc,
+ int x, int y, int c, int w, int read_pal_component);
+
+/**
+ * Write the values from src to the pixel format component c of an
+ * image line.
+ *
+ * @param src array containing the values to write
+ * @param data the array containing the pointers to the planes of the
+ * image to write into. It is supposed to be zeroed.
+ * @param linesize the array containing the linesizes of the image
+ * @param desc the pixel format descriptor for the image
+ * @param x the horizontal coordinate of the first pixel to write
+ * @param y the vertical coordinate of the first pixel to write
+ * @param w the width of the line to write, that is the number of
+ * values to write to the image line
+ */
+void av_write_image_line(const uint16_t *src, uint8_t *data[4],
+ const int linesize[4], const AVPixFmtDescriptor *desc,
+ int x, int y, int c, int w);
+
+/**
+ * Return the pixel format corresponding to name.
+ *
+ * If there is no pixel format with name name, then looks for a
+ * pixel format with the name corresponding to the native endian
+ * format of name.
+ * For example in a little-endian system, first looks for "gray16",
+ * then for "gray16le".
+ *
+ * Finally if no pixel format has been found, returns AV_PIX_FMT_NONE.
+ */
+enum AVPixelFormat av_get_pix_fmt(const char *name);
+
+/**
+ * Return the short name for a pixel format, NULL in case pix_fmt is
+ * unknown.
+ *
+ * @see av_get_pix_fmt(), av_get_pix_fmt_string()
+ */
+const char *av_get_pix_fmt_name(enum AVPixelFormat pix_fmt);
+
+/**
+ * Print in buf the string corresponding to the pixel format with
+ * number pix_fmt, or a header if pix_fmt is negative.
+ *
+ * @param buf the buffer where to write the string
+ * @param buf_size the size of buf
+ * @param pix_fmt the number of the pixel format to print the
+ * corresponding info string, or a negative value to print the
+ * corresponding header.
+ */
+char *av_get_pix_fmt_string(char *buf, int buf_size,
+ enum AVPixelFormat pix_fmt);
+
+/**
+ * Return the number of bits per pixel used by the pixel format
+ * described by pixdesc. Note that this is not the same as the number
+ * of bits per sample.
+ *
+ * The returned number of bits refers to the number of bits actually
+ * used for storing the pixel information, that is padding bits are
+ * not counted.
+ */
+int av_get_bits_per_pixel(const AVPixFmtDescriptor *pixdesc);
+
+/**
+ * Return the number of bits per pixel for the pixel format
+ * described by pixdesc, including any padding or unused bits.
+ */
+int av_get_padded_bits_per_pixel(const AVPixFmtDescriptor *pixdesc);
+
+/**
+ * @return a pixel format descriptor for provided pixel format or NULL if
+ * this pixel format is unknown.
+ */
+const AVPixFmtDescriptor *av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt);
+
+/**
+ * Iterate over all pixel format descriptors known to libavutil.
+ *
+ * @param prev previous descriptor. NULL to get the first descriptor.
+ *
+ * @return next descriptor or NULL after the last descriptor
+ */
+const AVPixFmtDescriptor *av_pix_fmt_desc_next(const AVPixFmtDescriptor *prev);
+
+/**
+ * @return an AVPixelFormat id described by desc, or AV_PIX_FMT_NONE if desc
+ * is not a valid pointer to a pixel format descriptor.
+ */
+enum AVPixelFormat av_pix_fmt_desc_get_id(const AVPixFmtDescriptor *desc);
+
+/**
+ * Utility function to access log2_chroma_w log2_chroma_h from
+ * the pixel format AVPixFmtDescriptor.
+ *
+ * See av_get_chroma_sub_sample() for a function that asserts a
+ * valid pixel format instead of returning an error code.
+ * Its recommended that you use avcodec_get_chroma_sub_sample unless
+ * you do check the return code!
+ *
+ * @param[in] pix_fmt the pixel format
+ * @param[out] h_shift store log2_chroma_w
+ * @param[out] v_shift store log2_chroma_h
+ *
+ * @return 0 on success, AVERROR(ENOSYS) on invalid or unknown pixel format
+ */
+int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt,
+ int *h_shift, int *v_shift);
+
+/**
+ * @return number of planes in pix_fmt, a negative AVERROR if pix_fmt is not a
+ * valid pixel format.
+ */
+int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt);
+
+/**
+ * Utility function to swap the endianness of a pixel format.
+ *
+ * @param[in] pix_fmt the pixel format
+ *
+ * @return pixel format with swapped endianness if it exists,
+ * otherwise AV_PIX_FMT_NONE
+ */
+enum AVPixelFormat av_pix_fmt_swap_endianness(enum AVPixelFormat pix_fmt);
+
+#define FF_LOSS_RESOLUTION 0x0001 /**< loss due to resolution change */
+#define FF_LOSS_DEPTH 0x0002 /**< loss due to color depth change */
+#define FF_LOSS_COLORSPACE 0x0004 /**< loss due to color space conversion */
+#define FF_LOSS_ALPHA 0x0008 /**< loss of alpha bits */
+#define FF_LOSS_COLORQUANT 0x0010 /**< loss due to color quantization */
+#define FF_LOSS_CHROMA 0x0020 /**< loss of chroma (e.g. RGB to gray conversion) */
+
+/**
+ * Compute what kind of losses will occur when converting from one specific
+ * pixel format to another.
+ * When converting from one pixel format to another, information loss may occur.
+ * For example, when converting from RGB24 to GRAY, the color information will
+ * be lost. Similarly, other losses occur when converting from some formats to
+ * other formats. These losses can involve loss of chroma, but also loss of
+ * resolution, loss of color depth, loss due to the color space conversion, loss
+ * of the alpha bits or loss due to color quantization.
+ * av_get_fix_fmt_loss() informs you about the various types of losses
+ * which will occur when converting from one pixel format to another.
+ *
+ * @param[in] dst_pix_fmt destination pixel format
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @return Combination of flags informing you what kind of losses will occur
+ * (maximum loss for an invalid dst_pix_fmt).
+ */
+int av_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt,
+ enum AVPixelFormat src_pix_fmt,
+ int has_alpha);
+
+/**
+ * Compute what kind of losses will occur when converting from one specific
+ * pixel format to another.
+ * When converting from one pixel format to another, information loss may occur.
+ * For example, when converting from RGB24 to GRAY, the color information will
+ * be lost. Similarly, other losses occur when converting from some formats to
+ * other formats. These losses can involve loss of chroma, but also loss of
+ * resolution, loss of color depth, loss due to the color space conversion, loss
+ * of the alpha bits or loss due to color quantization.
+ * av_get_fix_fmt_loss() informs you about the various types of losses
+ * which will occur when converting from one pixel format to another.
+ *
+ * @param[in] dst_pix_fmt destination pixel format
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @return Combination of flags informing you what kind of losses will occur
+ * (maximum loss for an invalid dst_pix_fmt).
+ */
+enum AVPixelFormat av_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+ enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr);
+
+/**
+ * @return the name for provided color range or NULL if unknown.
+ */
+const char *av_color_range_name(enum AVColorRange range);
+
+/**
+ * @return the name for provided color primaries or NULL if unknown.
+ */
+const char *av_color_primaries_name(enum AVColorPrimaries primaries);
+
+/**
+ * @return the name for provided color transfer or NULL if unknown.
+ */
+const char *av_color_transfer_name(enum AVColorTransferCharacteristic transfer);
+
+/**
+ * @return the name for provided color space or NULL if unknown.
+ */
+const char *av_color_space_name(enum AVColorSpace space);
+
+/**
+ * @return the name for provided chroma location or NULL if unknown.
+ */
+const char *av_chroma_location_name(enum AVChromaLocation location);
+
+#endif /* AVUTIL_PIXDESC_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/pixelutils.h b/TMessagesProj/jni/ffmpeg/include/libavutil/pixelutils.h
new file mode 100644
index 000000000..a8dbc157e
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/pixelutils.h
@@ -0,0 +1,52 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PIXELUTILS_H
+#define AVUTIL_PIXELUTILS_H
+
+#include
+#include
+#include "common.h"
+
+/**
+ * Sum of abs(src1[x] - src2[x])
+ */
+typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+
+/**
+ * Get a potentially optimized pointer to a Sum-of-absolute-differences
+ * function (see the av_pixelutils_sad_fn prototype).
+ *
+ * @param w_bits 1<
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PIXFMT_H
+#define AVUTIL_PIXFMT_H
+
+/**
+ * @file
+ * pixel format definitions
+ *
+ */
+
+#include "libavutil/avconfig.h"
+#include "version.h"
+
+#define AVPALETTE_SIZE 1024
+#define AVPALETTE_COUNT 256
+
+/**
+ * Pixel format.
+ *
+ * @note
+ * AV_PIX_FMT_RGB32 is handled in an endian-specific manner. An RGBA
+ * color is put together as:
+ * (A << 24) | (R << 16) | (G << 8) | B
+ * This is stored as BGRA on little-endian CPU architectures and ARGB on
+ * big-endian CPUs.
+ *
+ * @par
+ * When the pixel format is palettized RGB32 (AV_PIX_FMT_PAL8), the palettized
+ * image data is stored in AVFrame.data[0]. The palette is transported in
+ * AVFrame.data[1], is 1024 bytes long (256 4-byte entries) and is
+ * formatted the same as in AV_PIX_FMT_RGB32 described above (i.e., it is
+ * also endian-specific). Note also that the individual RGB32 palette
+ * components stored in AVFrame.data[1] should be in the range 0..255.
+ * This is important as many custom PAL8 video codecs that were designed
+ * to run on the IBM VGA graphics adapter use 6-bit palette components.
+ *
+ * @par
+ * For all the 8bit per pixel formats, an RGB32 palette is in data[1] like
+ * for pal8. This palette is filled in automatically by the function
+ * allocating the picture.
+ */
+enum AVPixelFormat {
+ AV_PIX_FMT_NONE = -1,
+ AV_PIX_FMT_YUV420P, ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
+ AV_PIX_FMT_YUYV422, ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
+ AV_PIX_FMT_RGB24, ///< packed RGB 8:8:8, 24bpp, RGBRGB...
+ AV_PIX_FMT_BGR24, ///< packed RGB 8:8:8, 24bpp, BGRBGR...
+ AV_PIX_FMT_YUV422P, ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
+ AV_PIX_FMT_YUV444P, ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
+ AV_PIX_FMT_YUV410P, ///< planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
+ AV_PIX_FMT_YUV411P, ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
+ AV_PIX_FMT_GRAY8, ///< Y , 8bpp
+ AV_PIX_FMT_MONOWHITE, ///< Y , 1bpp, 0 is white, 1 is black, in each byte pixels are ordered from the msb to the lsb
+ AV_PIX_FMT_MONOBLACK, ///< Y , 1bpp, 0 is black, 1 is white, in each byte pixels are ordered from the msb to the lsb
+ AV_PIX_FMT_PAL8, ///< 8 bit with AV_PIX_FMT_RGB32 palette
+ AV_PIX_FMT_YUVJ420P, ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting color_range
+ AV_PIX_FMT_YUVJ422P, ///< planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting color_range
+ AV_PIX_FMT_YUVJ444P, ///< planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting color_range
+#if FF_API_XVMC
+ AV_PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing
+ AV_PIX_FMT_XVMC_MPEG2_IDCT,
+#define AV_PIX_FMT_XVMC AV_PIX_FMT_XVMC_MPEG2_IDCT
+#endif /* FF_API_XVMC */
+ AV_PIX_FMT_UYVY422, ///< packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
+ AV_PIX_FMT_UYYVYY411, ///< packed YUV 4:1:1, 12bpp, Cb Y0 Y1 Cr Y2 Y3
+ AV_PIX_FMT_BGR8, ///< packed RGB 3:3:2, 8bpp, (msb)2B 3G 3R(lsb)
+ AV_PIX_FMT_BGR4, ///< packed RGB 1:2:1 bitstream, 4bpp, (msb)1B 2G 1R(lsb), a byte contains two pixels, the first pixel in the byte is the one composed by the 4 msb bits
+ AV_PIX_FMT_BGR4_BYTE, ///< packed RGB 1:2:1, 8bpp, (msb)1B 2G 1R(lsb)
+ AV_PIX_FMT_RGB8, ///< packed RGB 3:3:2, 8bpp, (msb)2R 3G 3B(lsb)
+ AV_PIX_FMT_RGB4, ///< packed RGB 1:2:1 bitstream, 4bpp, (msb)1R 2G 1B(lsb), a byte contains two pixels, the first pixel in the byte is the one composed by the 4 msb bits
+ AV_PIX_FMT_RGB4_BYTE, ///< packed RGB 1:2:1, 8bpp, (msb)1R 2G 1B(lsb)
+ AV_PIX_FMT_NV12, ///< planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V)
+ AV_PIX_FMT_NV21, ///< as above, but U and V bytes are swapped
+
+ AV_PIX_FMT_ARGB, ///< packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
+ AV_PIX_FMT_RGBA, ///< packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
+ AV_PIX_FMT_ABGR, ///< packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
+ AV_PIX_FMT_BGRA, ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
+
+ AV_PIX_FMT_GRAY16BE, ///< Y , 16bpp, big-endian
+ AV_PIX_FMT_GRAY16LE, ///< Y , 16bpp, little-endian
+ AV_PIX_FMT_YUV440P, ///< planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
+ AV_PIX_FMT_YUVJ440P, ///< planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
+ AV_PIX_FMT_YUVA420P, ///< planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
+#if FF_API_VDPAU
+ AV_PIX_FMT_VDPAU_H264,///< H.264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+ AV_PIX_FMT_VDPAU_MPEG1,///< MPEG-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+ AV_PIX_FMT_VDPAU_MPEG2,///< MPEG-2 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+ AV_PIX_FMT_VDPAU_WMV3,///< WMV3 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+ AV_PIX_FMT_VDPAU_VC1, ///< VC-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+#endif
+ AV_PIX_FMT_RGB48BE, ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as big-endian
+ AV_PIX_FMT_RGB48LE, ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as little-endian
+
+ AV_PIX_FMT_RGB565BE, ///< packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), big-endian
+ AV_PIX_FMT_RGB565LE, ///< packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), little-endian
+ AV_PIX_FMT_RGB555BE, ///< packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), big-endian , X=unused/undefined
+ AV_PIX_FMT_RGB555LE, ///< packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), little-endian, X=unused/undefined
+
+ AV_PIX_FMT_BGR565BE, ///< packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), big-endian
+ AV_PIX_FMT_BGR565LE, ///< packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), little-endian
+ AV_PIX_FMT_BGR555BE, ///< packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), big-endian , X=unused/undefined
+ AV_PIX_FMT_BGR555LE, ///< packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), little-endian, X=unused/undefined
+
+#if FF_API_VAAPI
+ /** @name Deprecated pixel formats */
+ /**@{*/
+ AV_PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state struct which contains macroblocks as well as various fields extracted from headers
+ AV_PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which contains fields extracted from headers
+ AV_PIX_FMT_VAAPI_VLD, ///< HW decoding through VA API, Picture.data[3] contains a vaapi_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+ /**@}*/
+ AV_PIX_FMT_VAAPI = AV_PIX_FMT_VAAPI_VLD,
+#else
+ /**
+ * Hardware acceleration through VA-API, data[3] contains a
+ * VASurfaceID.
+ */
+ AV_PIX_FMT_VAAPI,
+#endif
+
+ AV_PIX_FMT_YUV420P16LE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+ AV_PIX_FMT_YUV420P16BE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+ AV_PIX_FMT_YUV422P16LE, ///< planar YUV 4:2:2, 32bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
+ AV_PIX_FMT_YUV422P16BE, ///< planar YUV 4:2:2, 32bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+ AV_PIX_FMT_YUV444P16LE, ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+ AV_PIX_FMT_YUV444P16BE, ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
+#if FF_API_VDPAU
+ AV_PIX_FMT_VDPAU_MPEG4, ///< MPEG4 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+#endif
+ AV_PIX_FMT_DXVA2_VLD, ///< HW decoding through DXVA2, Picture.data[3] contains a LPDIRECT3DSURFACE9 pointer
+
+ AV_PIX_FMT_RGB444LE, ///< packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), little-endian, X=unused/undefined
+ AV_PIX_FMT_RGB444BE, ///< packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), big-endian, X=unused/undefined
+ AV_PIX_FMT_BGR444LE, ///< packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), little-endian, X=unused/undefined
+ AV_PIX_FMT_BGR444BE, ///< packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), big-endian, X=unused/undefined
+ AV_PIX_FMT_YA8, ///< 8bit gray, 8bit alpha
+
+ AV_PIX_FMT_Y400A = AV_PIX_FMT_YA8, ///< alias for AV_PIX_FMT_YA8
+ AV_PIX_FMT_GRAY8A= AV_PIX_FMT_YA8, ///< alias for AV_PIX_FMT_YA8
+
+ AV_PIX_FMT_BGR48BE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big-endian
+ AV_PIX_FMT_BGR48LE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as little-endian
+
+ /**
+ * The following 12 formats have the disadvantage of needing 1 format for each bit depth.
+ * Notice that each 9/10 bits sample is stored in 16 bits with extra padding.
+ * If you want to support multiple bit depths, then using AV_PIX_FMT_YUV420P16* with the bpp stored separately is better.
+ */
+ AV_PIX_FMT_YUV420P9BE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+ AV_PIX_FMT_YUV420P9LE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+ AV_PIX_FMT_YUV420P10BE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+ AV_PIX_FMT_YUV420P10LE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+ AV_PIX_FMT_YUV422P10BE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+ AV_PIX_FMT_YUV422P10LE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
+ AV_PIX_FMT_YUV444P9BE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
+ AV_PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+ AV_PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
+ AV_PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+ AV_PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+ AV_PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
+ AV_PIX_FMT_VDA_VLD, ///< hardware decoding through VDA
+ AV_PIX_FMT_GBRP, ///< planar GBR 4:4:4 24bpp
+ AV_PIX_FMT_GBRP9BE, ///< planar GBR 4:4:4 27bpp, big-endian
+ AV_PIX_FMT_GBRP9LE, ///< planar GBR 4:4:4 27bpp, little-endian
+ AV_PIX_FMT_GBRP10BE, ///< planar GBR 4:4:4 30bpp, big-endian
+ AV_PIX_FMT_GBRP10LE, ///< planar GBR 4:4:4 30bpp, little-endian
+ AV_PIX_FMT_GBRP16BE, ///< planar GBR 4:4:4 48bpp, big-endian
+ AV_PIX_FMT_GBRP16LE, ///< planar GBR 4:4:4 48bpp, little-endian
+ AV_PIX_FMT_YUVA422P, ///< planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
+ AV_PIX_FMT_YUVA444P, ///< planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
+ AV_PIX_FMT_YUVA420P9BE, ///< planar YUV 4:2:0 22.5bpp, (1 Cr & Cb sample per 2x2 Y & A samples), big-endian
+ AV_PIX_FMT_YUVA420P9LE, ///< planar YUV 4:2:0 22.5bpp, (1 Cr & Cb sample per 2x2 Y & A samples), little-endian
+ AV_PIX_FMT_YUVA422P9BE, ///< planar YUV 4:2:2 27bpp, (1 Cr & Cb sample per 2x1 Y & A samples), big-endian
+ AV_PIX_FMT_YUVA422P9LE, ///< planar YUV 4:2:2 27bpp, (1 Cr & Cb sample per 2x1 Y & A samples), little-endian
+ AV_PIX_FMT_YUVA444P9BE, ///< planar YUV 4:4:4 36bpp, (1 Cr & Cb sample per 1x1 Y & A samples), big-endian
+ AV_PIX_FMT_YUVA444P9LE, ///< planar YUV 4:4:4 36bpp, (1 Cr & Cb sample per 1x1 Y & A samples), little-endian
+ AV_PIX_FMT_YUVA420P10BE, ///< planar YUV 4:2:0 25bpp, (1 Cr & Cb sample per 2x2 Y & A samples, big-endian)
+ AV_PIX_FMT_YUVA420P10LE, ///< planar YUV 4:2:0 25bpp, (1 Cr & Cb sample per 2x2 Y & A samples, little-endian)
+ AV_PIX_FMT_YUVA422P10BE, ///< planar YUV 4:2:2 30bpp, (1 Cr & Cb sample per 2x1 Y & A samples, big-endian)
+ AV_PIX_FMT_YUVA422P10LE, ///< planar YUV 4:2:2 30bpp, (1 Cr & Cb sample per 2x1 Y & A samples, little-endian)
+ AV_PIX_FMT_YUVA444P10BE, ///< planar YUV 4:4:4 40bpp, (1 Cr & Cb sample per 1x1 Y & A samples, big-endian)
+ AV_PIX_FMT_YUVA444P10LE, ///< planar YUV 4:4:4 40bpp, (1 Cr & Cb sample per 1x1 Y & A samples, little-endian)
+ AV_PIX_FMT_YUVA420P16BE, ///< planar YUV 4:2:0 40bpp, (1 Cr & Cb sample per 2x2 Y & A samples, big-endian)
+ AV_PIX_FMT_YUVA420P16LE, ///< planar YUV 4:2:0 40bpp, (1 Cr & Cb sample per 2x2 Y & A samples, little-endian)
+ AV_PIX_FMT_YUVA422P16BE, ///< planar YUV 4:2:2 48bpp, (1 Cr & Cb sample per 2x1 Y & A samples, big-endian)
+ AV_PIX_FMT_YUVA422P16LE, ///< planar YUV 4:2:2 48bpp, (1 Cr & Cb sample per 2x1 Y & A samples, little-endian)
+ AV_PIX_FMT_YUVA444P16BE, ///< planar YUV 4:4:4 64bpp, (1 Cr & Cb sample per 1x1 Y & A samples, big-endian)
+ AV_PIX_FMT_YUVA444P16LE, ///< planar YUV 4:4:4 64bpp, (1 Cr & Cb sample per 1x1 Y & A samples, little-endian)
+
+ AV_PIX_FMT_VDPAU, ///< HW acceleration through VDPAU, Picture.data[3] contains a VdpVideoSurface
+
+ AV_PIX_FMT_XYZ12LE, ///< packed XYZ 4:4:4, 36 bpp, (msb) 12X, 12Y, 12Z (lsb), the 2-byte value for each X/Y/Z is stored as little-endian, the 4 lower bits are set to 0
+ AV_PIX_FMT_XYZ12BE, ///< packed XYZ 4:4:4, 36 bpp, (msb) 12X, 12Y, 12Z (lsb), the 2-byte value for each X/Y/Z is stored as big-endian, the 4 lower bits are set to 0
+ AV_PIX_FMT_NV16, ///< interleaved chroma YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
+ AV_PIX_FMT_NV20LE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
+ AV_PIX_FMT_NV20BE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+
+ AV_PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
+ AV_PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
+ AV_PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
+ AV_PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
+
+ AV_PIX_FMT_YVYU422, ///< packed YUV 4:2:2, 16bpp, Y0 Cr Y1 Cb
+
+ AV_PIX_FMT_VDA, ///< HW acceleration through VDA, data[3] contains a CVPixelBufferRef
+
+ AV_PIX_FMT_YA16BE, ///< 16bit gray, 16bit alpha (big-endian)
+ AV_PIX_FMT_YA16LE, ///< 16bit gray, 16bit alpha (little-endian)
+
+ AV_PIX_FMT_GBRAP, ///< planar GBRA 4:4:4:4 32bpp
+ AV_PIX_FMT_GBRAP16BE, ///< planar GBRA 4:4:4:4 64bpp, big-endian
+ AV_PIX_FMT_GBRAP16LE, ///< planar GBRA 4:4:4:4 64bpp, little-endian
+ /**
+ * HW acceleration through QSV, data[3] contains a pointer to the
+ * mfxFrameSurface1 structure.
+ */
+ AV_PIX_FMT_QSV,
+ /**
+ * HW acceleration though MMAL, data[3] contains a pointer to the
+ * MMAL_BUFFER_HEADER_T structure.
+ */
+ AV_PIX_FMT_MMAL,
+
+ AV_PIX_FMT_D3D11VA_VLD, ///< HW decoding through Direct3D11, Picture.data[3] contains a ID3D11VideoDecoderOutputView pointer
+
+ AV_PIX_FMT_0RGB=0x123+4,///< packed RGB 8:8:8, 32bpp, XRGBXRGB... X=unused/undefined
+ AV_PIX_FMT_RGB0, ///< packed RGB 8:8:8, 32bpp, RGBXRGBX... X=unused/undefined
+ AV_PIX_FMT_0BGR, ///< packed BGR 8:8:8, 32bpp, XBGRXBGR... X=unused/undefined
+ AV_PIX_FMT_BGR0, ///< packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
+
+ AV_PIX_FMT_YUV420P12BE, ///< planar YUV 4:2:0,18bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+ AV_PIX_FMT_YUV420P12LE, ///< planar YUV 4:2:0,18bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+ AV_PIX_FMT_YUV420P14BE, ///< planar YUV 4:2:0,21bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+ AV_PIX_FMT_YUV420P14LE, ///< planar YUV 4:2:0,21bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+ AV_PIX_FMT_YUV422P12BE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+ AV_PIX_FMT_YUV422P12LE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
+ AV_PIX_FMT_YUV422P14BE, ///< planar YUV 4:2:2,28bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+ AV_PIX_FMT_YUV422P14LE, ///< planar YUV 4:2:2,28bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
+ AV_PIX_FMT_YUV444P12BE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
+ AV_PIX_FMT_YUV444P12LE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+ AV_PIX_FMT_YUV444P14BE, ///< planar YUV 4:4:4,42bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
+ AV_PIX_FMT_YUV444P14LE, ///< planar YUV 4:4:4,42bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+ AV_PIX_FMT_GBRP12BE, ///< planar GBR 4:4:4 36bpp, big-endian
+ AV_PIX_FMT_GBRP12LE, ///< planar GBR 4:4:4 36bpp, little-endian
+ AV_PIX_FMT_GBRP14BE, ///< planar GBR 4:4:4 42bpp, big-endian
+ AV_PIX_FMT_GBRP14LE, ///< planar GBR 4:4:4 42bpp, little-endian
+ AV_PIX_FMT_YUVJ411P, ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV411P and setting color_range
+
+ AV_PIX_FMT_BAYER_BGGR8, ///< bayer, BGBG..(odd line), GRGR..(even line), 8-bit samples */
+ AV_PIX_FMT_BAYER_RGGB8, ///< bayer, RGRG..(odd line), GBGB..(even line), 8-bit samples */
+ AV_PIX_FMT_BAYER_GBRG8, ///< bayer, GBGB..(odd line), RGRG..(even line), 8-bit samples */
+ AV_PIX_FMT_BAYER_GRBG8, ///< bayer, GRGR..(odd line), BGBG..(even line), 8-bit samples */
+ AV_PIX_FMT_BAYER_BGGR16LE, ///< bayer, BGBG..(odd line), GRGR..(even line), 16-bit samples, little-endian */
+ AV_PIX_FMT_BAYER_BGGR16BE, ///< bayer, BGBG..(odd line), GRGR..(even line), 16-bit samples, big-endian */
+ AV_PIX_FMT_BAYER_RGGB16LE, ///< bayer, RGRG..(odd line), GBGB..(even line), 16-bit samples, little-endian */
+ AV_PIX_FMT_BAYER_RGGB16BE, ///< bayer, RGRG..(odd line), GBGB..(even line), 16-bit samples, big-endian */
+ AV_PIX_FMT_BAYER_GBRG16LE, ///< bayer, GBGB..(odd line), RGRG..(even line), 16-bit samples, little-endian */
+ AV_PIX_FMT_BAYER_GBRG16BE, ///< bayer, GBGB..(odd line), RGRG..(even line), 16-bit samples, big-endian */
+ AV_PIX_FMT_BAYER_GRBG16LE, ///< bayer, GRGR..(odd line), BGBG..(even line), 16-bit samples, little-endian */
+ AV_PIX_FMT_BAYER_GRBG16BE, ///< bayer, GRGR..(odd line), BGBG..(even line), 16-bit samples, big-endian */
+#if !FF_API_XVMC
+ AV_PIX_FMT_XVMC,///< XVideo Motion Acceleration via common packet passing
+#endif /* !FF_API_XVMC */
+ AV_PIX_FMT_YUV440P10LE, ///< planar YUV 4:4:0,20bpp, (1 Cr & Cb sample per 1x2 Y samples), little-endian
+ AV_PIX_FMT_YUV440P10BE, ///< planar YUV 4:4:0,20bpp, (1 Cr & Cb sample per 1x2 Y samples), big-endian
+ AV_PIX_FMT_YUV440P12LE, ///< planar YUV 4:4:0,24bpp, (1 Cr & Cb sample per 1x2 Y samples), little-endian
+ AV_PIX_FMT_YUV440P12BE, ///< planar YUV 4:4:0,24bpp, (1 Cr & Cb sample per 1x2 Y samples), big-endian
+ AV_PIX_FMT_AYUV64LE, ///< packed AYUV 4:4:4,64bpp (1 Cr & Cb sample per 1x1 Y & A samples), little-endian
+ AV_PIX_FMT_AYUV64BE, ///< packed AYUV 4:4:4,64bpp (1 Cr & Cb sample per 1x1 Y & A samples), big-endian
+
+ AV_PIX_FMT_VIDEOTOOLBOX, ///< hardware decoding through Videotoolbox
+
+ AV_PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
+};
+
+#define AV_PIX_FMT_Y400A AV_PIX_FMT_GRAY8A
+#define AV_PIX_FMT_GBR24P AV_PIX_FMT_GBRP
+
+#if AV_HAVE_BIGENDIAN
+# define AV_PIX_FMT_NE(be, le) AV_PIX_FMT_##be
+#else
+# define AV_PIX_FMT_NE(be, le) AV_PIX_FMT_##le
+#endif
+
+#define AV_PIX_FMT_RGB32 AV_PIX_FMT_NE(ARGB, BGRA)
+#define AV_PIX_FMT_RGB32_1 AV_PIX_FMT_NE(RGBA, ABGR)
+#define AV_PIX_FMT_BGR32 AV_PIX_FMT_NE(ABGR, RGBA)
+#define AV_PIX_FMT_BGR32_1 AV_PIX_FMT_NE(BGRA, ARGB)
+#define AV_PIX_FMT_0RGB32 AV_PIX_FMT_NE(0RGB, BGR0)
+#define AV_PIX_FMT_0BGR32 AV_PIX_FMT_NE(0BGR, RGB0)
+
+#define AV_PIX_FMT_GRAY16 AV_PIX_FMT_NE(GRAY16BE, GRAY16LE)
+#define AV_PIX_FMT_YA16 AV_PIX_FMT_NE(YA16BE, YA16LE)
+#define AV_PIX_FMT_RGB48 AV_PIX_FMT_NE(RGB48BE, RGB48LE)
+#define AV_PIX_FMT_RGB565 AV_PIX_FMT_NE(RGB565BE, RGB565LE)
+#define AV_PIX_FMT_RGB555 AV_PIX_FMT_NE(RGB555BE, RGB555LE)
+#define AV_PIX_FMT_RGB444 AV_PIX_FMT_NE(RGB444BE, RGB444LE)
+#define AV_PIX_FMT_RGBA64 AV_PIX_FMT_NE(RGBA64BE, RGBA64LE)
+#define AV_PIX_FMT_BGR48 AV_PIX_FMT_NE(BGR48BE, BGR48LE)
+#define AV_PIX_FMT_BGR565 AV_PIX_FMT_NE(BGR565BE, BGR565LE)
+#define AV_PIX_FMT_BGR555 AV_PIX_FMT_NE(BGR555BE, BGR555LE)
+#define AV_PIX_FMT_BGR444 AV_PIX_FMT_NE(BGR444BE, BGR444LE)
+#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NE(BGRA64BE, BGRA64LE)
+
+#define AV_PIX_FMT_YUV420P9 AV_PIX_FMT_NE(YUV420P9BE , YUV420P9LE)
+#define AV_PIX_FMT_YUV422P9 AV_PIX_FMT_NE(YUV422P9BE , YUV422P9LE)
+#define AV_PIX_FMT_YUV444P9 AV_PIX_FMT_NE(YUV444P9BE , YUV444P9LE)
+#define AV_PIX_FMT_YUV420P10 AV_PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
+#define AV_PIX_FMT_YUV422P10 AV_PIX_FMT_NE(YUV422P10BE, YUV422P10LE)
+#define AV_PIX_FMT_YUV440P10 AV_PIX_FMT_NE(YUV440P10BE, YUV440P10LE)
+#define AV_PIX_FMT_YUV444P10 AV_PIX_FMT_NE(YUV444P10BE, YUV444P10LE)
+#define AV_PIX_FMT_YUV420P12 AV_PIX_FMT_NE(YUV420P12BE, YUV420P12LE)
+#define AV_PIX_FMT_YUV422P12 AV_PIX_FMT_NE(YUV422P12BE, YUV422P12LE)
+#define AV_PIX_FMT_YUV440P12 AV_PIX_FMT_NE(YUV440P12BE, YUV440P12LE)
+#define AV_PIX_FMT_YUV444P12 AV_PIX_FMT_NE(YUV444P12BE, YUV444P12LE)
+#define AV_PIX_FMT_YUV420P14 AV_PIX_FMT_NE(YUV420P14BE, YUV420P14LE)
+#define AV_PIX_FMT_YUV422P14 AV_PIX_FMT_NE(YUV422P14BE, YUV422P14LE)
+#define AV_PIX_FMT_YUV444P14 AV_PIX_FMT_NE(YUV444P14BE, YUV444P14LE)
+#define AV_PIX_FMT_YUV420P16 AV_PIX_FMT_NE(YUV420P16BE, YUV420P16LE)
+#define AV_PIX_FMT_YUV422P16 AV_PIX_FMT_NE(YUV422P16BE, YUV422P16LE)
+#define AV_PIX_FMT_YUV444P16 AV_PIX_FMT_NE(YUV444P16BE, YUV444P16LE)
+
+#define AV_PIX_FMT_GBRP9 AV_PIX_FMT_NE(GBRP9BE , GBRP9LE)
+#define AV_PIX_FMT_GBRP10 AV_PIX_FMT_NE(GBRP10BE, GBRP10LE)
+#define AV_PIX_FMT_GBRP12 AV_PIX_FMT_NE(GBRP12BE, GBRP12LE)
+#define AV_PIX_FMT_GBRP14 AV_PIX_FMT_NE(GBRP14BE, GBRP14LE)
+#define AV_PIX_FMT_GBRP16 AV_PIX_FMT_NE(GBRP16BE, GBRP16LE)
+#define AV_PIX_FMT_GBRAP16 AV_PIX_FMT_NE(GBRAP16BE, GBRAP16LE)
+
+#define AV_PIX_FMT_BAYER_BGGR16 AV_PIX_FMT_NE(BAYER_BGGR16BE, BAYER_BGGR16LE)
+#define AV_PIX_FMT_BAYER_RGGB16 AV_PIX_FMT_NE(BAYER_RGGB16BE, BAYER_RGGB16LE)
+#define AV_PIX_FMT_BAYER_GBRG16 AV_PIX_FMT_NE(BAYER_GBRG16BE, BAYER_GBRG16LE)
+#define AV_PIX_FMT_BAYER_GRBG16 AV_PIX_FMT_NE(BAYER_GRBG16BE, BAYER_GRBG16LE)
+
+
+#define AV_PIX_FMT_YUVA420P9 AV_PIX_FMT_NE(YUVA420P9BE , YUVA420P9LE)
+#define AV_PIX_FMT_YUVA422P9 AV_PIX_FMT_NE(YUVA422P9BE , YUVA422P9LE)
+#define AV_PIX_FMT_YUVA444P9 AV_PIX_FMT_NE(YUVA444P9BE , YUVA444P9LE)
+#define AV_PIX_FMT_YUVA420P10 AV_PIX_FMT_NE(YUVA420P10BE, YUVA420P10LE)
+#define AV_PIX_FMT_YUVA422P10 AV_PIX_FMT_NE(YUVA422P10BE, YUVA422P10LE)
+#define AV_PIX_FMT_YUVA444P10 AV_PIX_FMT_NE(YUVA444P10BE, YUVA444P10LE)
+#define AV_PIX_FMT_YUVA420P16 AV_PIX_FMT_NE(YUVA420P16BE, YUVA420P16LE)
+#define AV_PIX_FMT_YUVA422P16 AV_PIX_FMT_NE(YUVA422P16BE, YUVA422P16LE)
+#define AV_PIX_FMT_YUVA444P16 AV_PIX_FMT_NE(YUVA444P16BE, YUVA444P16LE)
+
+#define AV_PIX_FMT_XYZ12 AV_PIX_FMT_NE(XYZ12BE, XYZ12LE)
+#define AV_PIX_FMT_NV20 AV_PIX_FMT_NE(NV20BE, NV20LE)
+#define AV_PIX_FMT_AYUV64 AV_PIX_FMT_NE(AYUV64BE, AYUV64LE)
+
+/**
+ * Chromaticity coordinates of the source primaries.
+ */
+enum AVColorPrimaries {
+ AVCOL_PRI_RESERVED0 = 0,
+ AVCOL_PRI_BT709 = 1, ///< also ITU-R BT1361 / IEC 61966-2-4 / SMPTE RP177 Annex B
+ AVCOL_PRI_UNSPECIFIED = 2,
+ AVCOL_PRI_RESERVED = 3,
+ AVCOL_PRI_BT470M = 4, ///< also FCC Title 47 Code of Federal Regulations 73.682 (a)(20)
+
+ AVCOL_PRI_BT470BG = 5, ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM
+ AVCOL_PRI_SMPTE170M = 6, ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC
+ AVCOL_PRI_SMPTE240M = 7, ///< functionally identical to above
+ AVCOL_PRI_FILM = 8, ///< colour filters using Illuminant C
+ AVCOL_PRI_BT2020 = 9, ///< ITU-R BT2020
+ AVCOL_PRI_SMPTEST428_1= 10, ///< SMPTE ST 428-1 (CIE 1931 XYZ)
+ AVCOL_PRI_NB, ///< Not part of ABI
+};
+
+/**
+ * Color Transfer Characteristic.
+ */
+enum AVColorTransferCharacteristic {
+ AVCOL_TRC_RESERVED0 = 0,
+ AVCOL_TRC_BT709 = 1, ///< also ITU-R BT1361
+ AVCOL_TRC_UNSPECIFIED = 2,
+ AVCOL_TRC_RESERVED = 3,
+ AVCOL_TRC_GAMMA22 = 4, ///< also ITU-R BT470M / ITU-R BT1700 625 PAL & SECAM
+ AVCOL_TRC_GAMMA28 = 5, ///< also ITU-R BT470BG
+ AVCOL_TRC_SMPTE170M = 6, ///< also ITU-R BT601-6 525 or 625 / ITU-R BT1358 525 or 625 / ITU-R BT1700 NTSC
+ AVCOL_TRC_SMPTE240M = 7,
+ AVCOL_TRC_LINEAR = 8, ///< "Linear transfer characteristics"
+ AVCOL_TRC_LOG = 9, ///< "Logarithmic transfer characteristic (100:1 range)"
+ AVCOL_TRC_LOG_SQRT = 10, ///< "Logarithmic transfer characteristic (100 * Sqrt(10) : 1 range)"
+ AVCOL_TRC_IEC61966_2_4 = 11, ///< IEC 61966-2-4
+ AVCOL_TRC_BT1361_ECG = 12, ///< ITU-R BT1361 Extended Colour Gamut
+ AVCOL_TRC_IEC61966_2_1 = 13, ///< IEC 61966-2-1 (sRGB or sYCC)
+ AVCOL_TRC_BT2020_10 = 14, ///< ITU-R BT2020 for 10 bit system
+ AVCOL_TRC_BT2020_12 = 15, ///< ITU-R BT2020 for 12 bit system
+ AVCOL_TRC_SMPTEST2084 = 16, ///< SMPTE ST 2084 for 10, 12, 14 and 16 bit systems
+ AVCOL_TRC_SMPTEST428_1 = 17, ///< SMPTE ST 428-1
+ AVCOL_TRC_NB, ///< Not part of ABI
+};
+
+/**
+ * YUV colorspace type.
+ */
+enum AVColorSpace {
+ AVCOL_SPC_RGB = 0, ///< order of coefficients is actually GBR, also IEC 61966-2-1 (sRGB)
+ AVCOL_SPC_BT709 = 1, ///< also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
+ AVCOL_SPC_UNSPECIFIED = 2,
+ AVCOL_SPC_RESERVED = 3,
+ AVCOL_SPC_FCC = 4, ///< FCC Title 47 Code of Federal Regulations 73.682 (a)(20)
+ AVCOL_SPC_BT470BG = 5, ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / IEC 61966-2-4 xvYCC601
+ AVCOL_SPC_SMPTE170M = 6, ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / functionally identical to above
+ AVCOL_SPC_SMPTE240M = 7,
+ AVCOL_SPC_YCOCG = 8, ///< Used by Dirac / VC-2 and H.264 FRext, see ITU-T SG16
+ AVCOL_SPC_BT2020_NCL = 9, ///< ITU-R BT2020 non-constant luminance system
+ AVCOL_SPC_BT2020_CL = 10, ///< ITU-R BT2020 constant luminance system
+ AVCOL_SPC_NB, ///< Not part of ABI
+};
+#define AVCOL_SPC_YCGCO AVCOL_SPC_YCOCG
+
+
+/**
+ * MPEG vs JPEG YUV range.
+ */
+enum AVColorRange {
+ AVCOL_RANGE_UNSPECIFIED = 0,
+ AVCOL_RANGE_MPEG = 1, ///< the normal 219*2^(n-8) "MPEG" YUV ranges
+ AVCOL_RANGE_JPEG = 2, ///< the normal 2^n-1 "JPEG" YUV ranges
+ AVCOL_RANGE_NB, ///< Not part of ABI
+};
+
+/**
+ * Location of chroma samples.
+ *
+ * Illustration showing the location of the first (top left) chroma sample of the
+ * image, the left shows only luma, the right
+ * shows the location of the chroma sample, the 2 could be imagined to overlay
+ * each other but are drawn separately due to limitations of ASCII
+ *
+ * 1st 2nd 1st 2nd horizontal luma sample positions
+ * v v v v
+ * ______ ______
+ *1st luma line > |X X ... |3 4 X ... X are luma samples,
+ * | |1 2 1-6 are possible chroma positions
+ *2nd luma line > |X X ... |5 6 X ... 0 is undefined/unknown position
+ */
+enum AVChromaLocation {
+ AVCHROMA_LOC_UNSPECIFIED = 0,
+ AVCHROMA_LOC_LEFT = 1, ///< mpeg2/4 4:2:0, h264 default for 4:2:0
+ AVCHROMA_LOC_CENTER = 2, ///< mpeg1 4:2:0, jpeg 4:2:0, h263 4:2:0
+ AVCHROMA_LOC_TOPLEFT = 3, ///< ITU-R 601, SMPTE 274M 296M S314M(DV 4:1:1), mpeg2 4:2:2
+ AVCHROMA_LOC_TOP = 4,
+ AVCHROMA_LOC_BOTTOMLEFT = 5,
+ AVCHROMA_LOC_BOTTOM = 6,
+ AVCHROMA_LOC_NB, ///< Not part of ABI
+};
+
+#endif /* AVUTIL_PIXFMT_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/random_seed.h b/TMessagesProj/jni/ffmpeg/include/libavutil/random_seed.h
new file mode 100644
index 000000000..0462a048e
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/random_seed.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2009 Baptiste Coudurier
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_RANDOM_SEED_H
+#define AVUTIL_RANDOM_SEED_H
+
+#include
+/**
+ * @addtogroup lavu_crypto
+ * @{
+ */
+
+/**
+ * Get a seed to use in conjunction with random functions.
+ * This function tries to provide a good seed at a best effort bases.
+ * Its possible to call this function multiple times if more bits are needed.
+ * It can be quite slow, which is why it should only be used as seed for a faster
+ * PRNG. The quality of the seed depends on the platform.
+ */
+uint32_t av_get_random_seed(void);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_RANDOM_SEED_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/rational.h b/TMessagesProj/jni/ffmpeg/include/libavutil/rational.h
new file mode 100644
index 000000000..289746968
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/rational.h
@@ -0,0 +1,173 @@
+/*
+ * rational numbers
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * rational numbers
+ * @author Michael Niedermayer
+ */
+
+#ifndef AVUTIL_RATIONAL_H
+#define AVUTIL_RATIONAL_H
+
+#include
+#include
+#include "attributes.h"
+
+/**
+ * @addtogroup lavu_math
+ * @{
+ */
+
+/**
+ * rational number numerator/denominator
+ */
+typedef struct AVRational{
+ int num; ///< numerator
+ int den; ///< denominator
+} AVRational;
+
+/**
+ * Create a rational.
+ * Useful for compilers that do not support compound literals.
+ * @note The return value is not reduced.
+ */
+static inline AVRational av_make_q(int num, int den)
+{
+ AVRational r = { num, den };
+ return r;
+}
+
+/**
+ * Compare two rationals.
+ * @param a first rational
+ * @param b second rational
+ * @return 0 if a==b, 1 if a>b, -1 if a>63)|1;
+ else if(b.den && a.den) return 0;
+ else if(a.num && b.num) return (a.num>>31) - (b.num>>31);
+ else return INT_MIN;
+}
+
+/**
+ * Convert rational to double.
+ * @param a rational to convert
+ * @return (double) a
+ */
+static inline double av_q2d(AVRational a){
+ return a.num / (double) a.den;
+}
+
+/**
+ * Reduce a fraction.
+ * This is useful for framerate calculations.
+ * @param dst_num destination numerator
+ * @param dst_den destination denominator
+ * @param num source numerator
+ * @param den source denominator
+ * @param max the maximum allowed for dst_num & dst_den
+ * @return 1 if exact, 0 otherwise
+ */
+int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max);
+
+/**
+ * Multiply two rationals.
+ * @param b first rational
+ * @param c second rational
+ * @return b*c
+ */
+AVRational av_mul_q(AVRational b, AVRational c) av_const;
+
+/**
+ * Divide one rational by another.
+ * @param b first rational
+ * @param c second rational
+ * @return b/c
+ */
+AVRational av_div_q(AVRational b, AVRational c) av_const;
+
+/**
+ * Add two rationals.
+ * @param b first rational
+ * @param c second rational
+ * @return b+c
+ */
+AVRational av_add_q(AVRational b, AVRational c) av_const;
+
+/**
+ * Subtract one rational from another.
+ * @param b first rational
+ * @param c second rational
+ * @return b-c
+ */
+AVRational av_sub_q(AVRational b, AVRational c) av_const;
+
+/**
+ * Invert a rational.
+ * @param q value
+ * @return 1 / q
+ */
+static av_always_inline AVRational av_inv_q(AVRational q)
+{
+ AVRational r = { q.den, q.num };
+ return r;
+}
+
+/**
+ * Convert a double precision floating point number to a rational.
+ * inf is expressed as {1,0} or {-1,0} depending on the sign.
+ *
+ * @param d double to convert
+ * @param max the maximum allowed numerator and denominator
+ * @return (AVRational) d
+ */
+AVRational av_d2q(double d, int max) av_const;
+
+/**
+ * @return 1 if q1 is nearer to q than q2, -1 if q2 is nearer
+ * than q1, 0 if they have the same distance.
+ */
+int av_nearer_q(AVRational q, AVRational q1, AVRational q2);
+
+/**
+ * Find the nearest value in q_list to q.
+ * @param q_list an array of rationals terminated by {0, 0}
+ * @return the index of the nearest value found in the array
+ */
+int av_find_nearest_q_idx(AVRational q, const AVRational* q_list);
+
+/**
+ * Converts a AVRational to a IEEE 32bit float.
+ *
+ * The float is returned in a uint32_t and its value is platform indepenant.
+ */
+uint32_t av_q2intfloat(AVRational q);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_RATIONAL_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/rc4.h b/TMessagesProj/jni/ffmpeg/include/libavutil/rc4.h
new file mode 100644
index 000000000..029cd2ad5
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/rc4.h
@@ -0,0 +1,66 @@
+/*
+ * RC4 encryption/decryption/pseudo-random number generator
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_RC4_H
+#define AVUTIL_RC4_H
+
+#include
+
+/**
+ * @defgroup lavu_rc4 RC4
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+typedef struct AVRC4 {
+ uint8_t state[256];
+ int x, y;
+} AVRC4;
+
+/**
+ * Allocate an AVRC4 context.
+ */
+AVRC4 *av_rc4_alloc(void);
+
+/**
+ * @brief Initializes an AVRC4 context.
+ *
+ * @param key_bits must be a multiple of 8
+ * @param decrypt 0 for encryption, 1 for decryption, currently has no effect
+ * @return zero on success, negative value otherwise
+ */
+int av_rc4_init(struct AVRC4 *d, const uint8_t *key, int key_bits, int decrypt);
+
+/**
+ * @brief Encrypts / decrypts using the RC4 algorithm.
+ *
+ * @param count number of bytes
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst, may be NULL
+ * @param iv not (yet) used for RC4, should be NULL
+ * @param decrypt 0 for encryption, 1 for decryption, not (yet) used
+ */
+void av_rc4_crypt(struct AVRC4 *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_RC4_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/replaygain.h b/TMessagesProj/jni/ffmpeg/include/libavutil/replaygain.h
new file mode 100644
index 000000000..5c03e1993
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/replaygain.h
@@ -0,0 +1,51 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_REPLAYGAIN_H
+#define AVUTIL_REPLAYGAIN_H
+
+#include
+
+/**
+ * ReplayGain information (see
+ * http://wiki.hydrogenaudio.org/index.php?title=ReplayGain_1.0_specification).
+ * The size of this struct is a part of the public ABI.
+ */
+typedef struct AVReplayGain {
+ /**
+ * Track replay gain in microbels (divide by 100000 to get the value in dB).
+ * Should be set to INT32_MIN when unknown.
+ */
+ int32_t track_gain;
+ /**
+ * Peak track amplitude, with 100000 representing full scale (but values
+ * may overflow). 0 when unknown.
+ */
+ uint32_t track_peak;
+ /**
+ * Same as track_gain, but for the whole album.
+ */
+ int32_t album_gain;
+ /**
+ * Same as track_peak, but for the whole album,
+ */
+ uint32_t album_peak;
+} AVReplayGain;
+
+#endif /* AVUTIL_REPLAYGAIN_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/ripemd.h b/TMessagesProj/jni/ffmpeg/include/libavutil/ripemd.h
new file mode 100644
index 000000000..7b0c8bc89
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/ripemd.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2007 Michael Niedermayer
+ * Copyright (C) 2013 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_RIPEMD_H
+#define AVUTIL_RIPEMD_H
+
+#include
+
+#include "attributes.h"
+#include "version.h"
+
+/**
+ * @defgroup lavu_ripemd RIPEMD
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+extern const int av_ripemd_size;
+
+struct AVRIPEMD;
+
+/**
+ * Allocate an AVRIPEMD context.
+ */
+struct AVRIPEMD *av_ripemd_alloc(void);
+
+/**
+ * Initialize RIPEMD hashing.
+ *
+ * @param context pointer to the function context (of size av_ripemd_size)
+ * @param bits number of bits in digest (128, 160, 256 or 320 bits)
+ * @return zero if initialization succeeded, -1 otherwise
+ */
+int av_ripemd_init(struct AVRIPEMD* context, int bits);
+
+/**
+ * Update hash value.
+ *
+ * @param context hash function context
+ * @param data input data to update hash with
+ * @param len input data length
+ */
+void av_ripemd_update(struct AVRIPEMD* context, const uint8_t* data, unsigned int len);
+
+/**
+ * Finish hashing and output digest value.
+ *
+ * @param context hash function context
+ * @param digest buffer where output digest value is stored
+ */
+void av_ripemd_final(struct AVRIPEMD* context, uint8_t *digest);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_RIPEMD_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/samplefmt.h b/TMessagesProj/jni/ffmpeg/include/libavutil/samplefmt.h
new file mode 100644
index 000000000..6a8a031c0
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/samplefmt.h
@@ -0,0 +1,271 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_SAMPLEFMT_H
+#define AVUTIL_SAMPLEFMT_H
+
+#include
+
+#include "avutil.h"
+#include "attributes.h"
+
+/**
+ * @addtogroup lavu_audio
+ * @{
+ *
+ * @defgroup lavu_sampfmts Audio sample formats
+ *
+ * Audio sample format enumeration and related convenience functions.
+ * @{
+ *
+ */
+
+/**
+ * Audio sample formats
+ *
+ * - The data described by the sample format is always in native-endian order.
+ * Sample values can be expressed by native C types, hence the lack of a signed
+ * 24-bit sample format even though it is a common raw audio data format.
+ *
+ * - The floating-point formats are based on full volume being in the range
+ * [-1.0, 1.0]. Any values outside this range are beyond full volume level.
+ *
+ * - The data layout as used in av_samples_fill_arrays() and elsewhere in FFmpeg
+ * (such as AVFrame in libavcodec) is as follows:
+ *
+ * @par
+ * For planar sample formats, each audio channel is in a separate data plane,
+ * and linesize is the buffer size, in bytes, for a single plane. All data
+ * planes must be the same size. For packed sample formats, only the first data
+ * plane is used, and samples for each channel are interleaved. In this case,
+ * linesize is the buffer size, in bytes, for the 1 plane.
+ *
+ */
+enum AVSampleFormat {
+ AV_SAMPLE_FMT_NONE = -1,
+ AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
+ AV_SAMPLE_FMT_S16, ///< signed 16 bits
+ AV_SAMPLE_FMT_S32, ///< signed 32 bits
+ AV_SAMPLE_FMT_FLT, ///< float
+ AV_SAMPLE_FMT_DBL, ///< double
+
+ AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
+ AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
+ AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
+ AV_SAMPLE_FMT_FLTP, ///< float, planar
+ AV_SAMPLE_FMT_DBLP, ///< double, planar
+
+ AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
+};
+
+/**
+ * Return the name of sample_fmt, or NULL if sample_fmt is not
+ * recognized.
+ */
+const char *av_get_sample_fmt_name(enum AVSampleFormat sample_fmt);
+
+/**
+ * Return a sample format corresponding to name, or AV_SAMPLE_FMT_NONE
+ * on error.
+ */
+enum AVSampleFormat av_get_sample_fmt(const char *name);
+
+/**
+ * Return the planar<->packed alternative form of the given sample format, or
+ * AV_SAMPLE_FMT_NONE on error. If the passed sample_fmt is already in the
+ * requested planar/packed format, the format returned is the same as the
+ * input.
+ */
+enum AVSampleFormat av_get_alt_sample_fmt(enum AVSampleFormat sample_fmt, int planar);
+
+/**
+ * Get the packed alternative form of the given sample format.
+ *
+ * If the passed sample_fmt is already in packed format, the format returned is
+ * the same as the input.
+ *
+ * @return the packed alternative form of the given sample format or
+ AV_SAMPLE_FMT_NONE on error.
+ */
+enum AVSampleFormat av_get_packed_sample_fmt(enum AVSampleFormat sample_fmt);
+
+/**
+ * Get the planar alternative form of the given sample format.
+ *
+ * If the passed sample_fmt is already in planar format, the format returned is
+ * the same as the input.
+ *
+ * @return the planar alternative form of the given sample format or
+ AV_SAMPLE_FMT_NONE on error.
+ */
+enum AVSampleFormat av_get_planar_sample_fmt(enum AVSampleFormat sample_fmt);
+
+/**
+ * Generate a string corresponding to the sample format with
+ * sample_fmt, or a header if sample_fmt is negative.
+ *
+ * @param buf the buffer where to write the string
+ * @param buf_size the size of buf
+ * @param sample_fmt the number of the sample format to print the
+ * corresponding info string, or a negative value to print the
+ * corresponding header.
+ * @return the pointer to the filled buffer or NULL if sample_fmt is
+ * unknown or in case of other errors
+ */
+char *av_get_sample_fmt_string(char *buf, int buf_size, enum AVSampleFormat sample_fmt);
+
+/**
+ * Return number of bytes per sample.
+ *
+ * @param sample_fmt the sample format
+ * @return number of bytes per sample or zero if unknown for the given
+ * sample format
+ */
+int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt);
+
+/**
+ * Check if the sample format is planar.
+ *
+ * @param sample_fmt the sample format to inspect
+ * @return 1 if the sample format is planar, 0 if it is interleaved
+ */
+int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt);
+
+/**
+ * Get the required buffer size for the given audio parameters.
+ *
+ * @param[out] linesize calculated linesize, may be NULL
+ * @param nb_channels the number of channels
+ * @param nb_samples the number of samples in a single channel
+ * @param sample_fmt the sample format
+ * @param align buffer size alignment (0 = default, 1 = no alignment)
+ * @return required buffer size, or negative error code on failure
+ */
+int av_samples_get_buffer_size(int *linesize, int nb_channels, int nb_samples,
+ enum AVSampleFormat sample_fmt, int align);
+
+/**
+ * @}
+ *
+ * @defgroup lavu_sampmanip Samples manipulation
+ *
+ * Functions that manipulate audio samples
+ * @{
+ */
+
+/**
+ * Fill plane data pointers and linesize for samples with sample
+ * format sample_fmt.
+ *
+ * The audio_data array is filled with the pointers to the samples data planes:
+ * for planar, set the start point of each channel's data within the buffer,
+ * for packed, set the start point of the entire buffer only.
+ *
+ * The value pointed to by linesize is set to the aligned size of each
+ * channel's data buffer for planar layout, or to the aligned size of the
+ * buffer for all channels for packed layout.
+ *
+ * The buffer in buf must be big enough to contain all the samples
+ * (use av_samples_get_buffer_size() to compute its minimum size),
+ * otherwise the audio_data pointers will point to invalid data.
+ *
+ * @see enum AVSampleFormat
+ * The documentation for AVSampleFormat describes the data layout.
+ *
+ * @param[out] audio_data array to be filled with the pointer for each channel
+ * @param[out] linesize calculated linesize, may be NULL
+ * @param buf the pointer to a buffer containing the samples
+ * @param nb_channels the number of channels
+ * @param nb_samples the number of samples in a single channel
+ * @param sample_fmt the sample format
+ * @param align buffer size alignment (0 = default, 1 = no alignment)
+ * @return >=0 on success or a negative error code on failure
+ * @todo return minimum size in bytes required for the buffer in case
+ * of success at the next bump
+ */
+int av_samples_fill_arrays(uint8_t **audio_data, int *linesize,
+ const uint8_t *buf,
+ int nb_channels, int nb_samples,
+ enum AVSampleFormat sample_fmt, int align);
+
+/**
+ * Allocate a samples buffer for nb_samples samples, and fill data pointers and
+ * linesize accordingly.
+ * The allocated samples buffer can be freed by using av_freep(&audio_data[0])
+ * Allocated data will be initialized to silence.
+ *
+ * @see enum AVSampleFormat
+ * The documentation for AVSampleFormat describes the data layout.
+ *
+ * @param[out] audio_data array to be filled with the pointer for each channel
+ * @param[out] linesize aligned size for audio buffer(s), may be NULL
+ * @param nb_channels number of audio channels
+ * @param nb_samples number of samples per channel
+ * @param align buffer size alignment (0 = default, 1 = no alignment)
+ * @return >=0 on success or a negative error code on failure
+ * @todo return the size of the allocated buffer in case of success at the next bump
+ * @see av_samples_fill_arrays()
+ * @see av_samples_alloc_array_and_samples()
+ */
+int av_samples_alloc(uint8_t **audio_data, int *linesize, int nb_channels,
+ int nb_samples, enum AVSampleFormat sample_fmt, int align);
+
+/**
+ * Allocate a data pointers array, samples buffer for nb_samples
+ * samples, and fill data pointers and linesize accordingly.
+ *
+ * This is the same as av_samples_alloc(), but also allocates the data
+ * pointers array.
+ *
+ * @see av_samples_alloc()
+ */
+int av_samples_alloc_array_and_samples(uint8_t ***audio_data, int *linesize, int nb_channels,
+ int nb_samples, enum AVSampleFormat sample_fmt, int align);
+
+/**
+ * Copy samples from src to dst.
+ *
+ * @param dst destination array of pointers to data planes
+ * @param src source array of pointers to data planes
+ * @param dst_offset offset in samples at which the data will be written to dst
+ * @param src_offset offset in samples at which the data will be read from src
+ * @param nb_samples number of samples to be copied
+ * @param nb_channels number of audio channels
+ * @param sample_fmt audio sample format
+ */
+int av_samples_copy(uint8_t **dst, uint8_t * const *src, int dst_offset,
+ int src_offset, int nb_samples, int nb_channels,
+ enum AVSampleFormat sample_fmt);
+
+/**
+ * Fill an audio buffer with silence.
+ *
+ * @param audio_data array of pointers to data planes
+ * @param offset offset in samples at which to start filling
+ * @param nb_samples number of samples to fill
+ * @param nb_channels number of audio channels
+ * @param sample_fmt audio sample format
+ */
+int av_samples_set_silence(uint8_t **audio_data, int offset, int nb_samples,
+ int nb_channels, enum AVSampleFormat sample_fmt);
+
+/**
+ * @}
+ * @}
+ */
+#endif /* AVUTIL_SAMPLEFMT_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/sha.h b/TMessagesProj/jni/ffmpeg/include/libavutil/sha.h
new file mode 100644
index 000000000..bf4377e51
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/sha.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2007 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_SHA_H
+#define AVUTIL_SHA_H
+
+#include
+
+#include "attributes.h"
+#include "version.h"
+
+/**
+ * @defgroup lavu_sha SHA
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+extern const int av_sha_size;
+
+struct AVSHA;
+
+/**
+ * Allocate an AVSHA context.
+ */
+struct AVSHA *av_sha_alloc(void);
+
+/**
+ * Initialize SHA-1 or SHA-2 hashing.
+ *
+ * @param context pointer to the function context (of size av_sha_size)
+ * @param bits number of bits in digest (SHA-1 - 160 bits, SHA-2 224 or 256 bits)
+ * @return zero if initialization succeeded, -1 otherwise
+ */
+int av_sha_init(struct AVSHA* context, int bits);
+
+/**
+ * Update hash value.
+ *
+ * @param context hash function context
+ * @param data input data to update hash with
+ * @param len input data length
+ */
+void av_sha_update(struct AVSHA* context, const uint8_t* data, unsigned int len);
+
+/**
+ * Finish hashing and output digest value.
+ *
+ * @param context hash function context
+ * @param digest buffer where output digest value is stored
+ */
+void av_sha_final(struct AVSHA* context, uint8_t *digest);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_SHA_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/sha512.h b/TMessagesProj/jni/ffmpeg/include/libavutil/sha512.h
new file mode 100644
index 000000000..7b0870147
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/sha512.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2007 Michael Niedermayer
+ * Copyright (C) 2013 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_SHA512_H
+#define AVUTIL_SHA512_H
+
+#include
+
+#include "attributes.h"
+#include "version.h"
+
+/**
+ * @defgroup lavu_sha512 SHA512
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+extern const int av_sha512_size;
+
+struct AVSHA512;
+
+/**
+ * Allocate an AVSHA512 context.
+ */
+struct AVSHA512 *av_sha512_alloc(void);
+
+/**
+ * Initialize SHA-2 512 hashing.
+ *
+ * @param context pointer to the function context (of size av_sha512_size)
+ * @param bits number of bits in digest (224, 256, 384 or 512 bits)
+ * @return zero if initialization succeeded, -1 otherwise
+ */
+int av_sha512_init(struct AVSHA512* context, int bits);
+
+/**
+ * Update hash value.
+ *
+ * @param context hash function context
+ * @param data input data to update hash with
+ * @param len input data length
+ */
+void av_sha512_update(struct AVSHA512* context, const uint8_t* data, unsigned int len);
+
+/**
+ * Finish hashing and output digest value.
+ *
+ * @param context hash function context
+ * @param digest buffer where output digest value is stored
+ */
+void av_sha512_final(struct AVSHA512* context, uint8_t *digest);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_SHA512_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/stereo3d.h b/TMessagesProj/jni/ffmpeg/include/libavutil/stereo3d.h
new file mode 100644
index 000000000..1135dc9dd
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/stereo3d.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2013 Vittorio Giovara
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_STEREO3D_H
+#define AVUTIL_STEREO3D_H
+
+#include
+
+#include "frame.h"
+
+/**
+ * List of possible 3D Types
+ */
+enum AVStereo3DType {
+ /**
+ * Video is not stereoscopic (and metadata has to be there).
+ */
+ AV_STEREO3D_2D,
+
+ /**
+ * Views are next to each other.
+ *
+ * LLLLRRRR
+ * LLLLRRRR
+ * LLLLRRRR
+ * ...
+ */
+ AV_STEREO3D_SIDEBYSIDE,
+
+ /**
+ * Views are on top of each other.
+ *
+ * LLLLLLLL
+ * LLLLLLLL
+ * RRRRRRRR
+ * RRRRRRRR
+ */
+ AV_STEREO3D_TOPBOTTOM,
+
+ /**
+ * Views are alternated temporally.
+ *
+ * frame0 frame1 frame2 ...
+ * LLLLLLLL RRRRRRRR LLLLLLLL
+ * LLLLLLLL RRRRRRRR LLLLLLLL
+ * LLLLLLLL RRRRRRRR LLLLLLLL
+ * ... ... ...
+ */
+ AV_STEREO3D_FRAMESEQUENCE,
+
+ /**
+ * Views are packed in a checkerboard-like structure per pixel.
+ *
+ * LRLRLRLR
+ * RLRLRLRL
+ * LRLRLRLR
+ * ...
+ */
+ AV_STEREO3D_CHECKERBOARD,
+
+ /**
+ * Views are next to each other, but when upscaling
+ * apply a checkerboard pattern.
+ *
+ * LLLLRRRR L L L L R R R R
+ * LLLLRRRR => L L L L R R R R
+ * LLLLRRRR L L L L R R R R
+ * LLLLRRRR L L L L R R R R
+ */
+ AV_STEREO3D_SIDEBYSIDE_QUINCUNX,
+
+ /**
+ * Views are packed per line, as if interlaced.
+ *
+ * LLLLLLLL
+ * RRRRRRRR
+ * LLLLLLLL
+ * ...
+ */
+ AV_STEREO3D_LINES,
+
+ /**
+ * Views are packed per column.
+ *
+ * LRLRLRLR
+ * LRLRLRLR
+ * LRLRLRLR
+ * ...
+ */
+ AV_STEREO3D_COLUMNS,
+};
+
+
+/**
+ * Inverted views, Right/Bottom represents the left view.
+ */
+#define AV_STEREO3D_FLAG_INVERT (1 << 0)
+
+/**
+ * Stereo 3D type: this structure describes how two videos are packed
+ * within a single video surface, with additional information as needed.
+ *
+ * @note The struct must be allocated with av_stereo3d_alloc() and
+ * its size is not a part of the public ABI.
+ */
+typedef struct AVStereo3D {
+ /**
+ * How views are packed within the video.
+ */
+ enum AVStereo3DType type;
+
+ /**
+ * Additional information about the frame packing.
+ */
+ int flags;
+} AVStereo3D;
+
+/**
+ * Allocate an AVStereo3D structure and set its fields to default values.
+ * The resulting struct can be freed using av_freep().
+ *
+ * @return An AVStereo3D filled with default values or NULL on failure.
+ */
+AVStereo3D *av_stereo3d_alloc(void);
+
+/**
+ * Allocate a complete AVFrameSideData and add it to the frame.
+ *
+ * @param frame The frame which side data is added to.
+ *
+ * @return The AVStereo3D structure to be filled by caller.
+ */
+AVStereo3D *av_stereo3d_create_side_data(AVFrame *frame);
+
+#endif /* AVUTIL_STEREO3D_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/tea.h b/TMessagesProj/jni/ffmpeg/include/libavutil/tea.h
new file mode 100644
index 000000000..dd929bdaf
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/tea.h
@@ -0,0 +1,71 @@
+/*
+ * A 32-bit implementation of the TEA algorithm
+ * Copyright (c) 2015 Vesselin Bontchev
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_TEA_H
+#define AVUTIL_TEA_H
+
+#include
+
+/**
+ * @file
+ * @brief Public header for libavutil TEA algorithm
+ * @defgroup lavu_tea TEA
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+extern const int av_tea_size;
+
+struct AVTEA;
+
+/**
+ * Allocate an AVTEA context
+ * To free the struct: av_free(ptr)
+ */
+struct AVTEA *av_tea_alloc(void);
+
+/**
+ * Initialize an AVTEA context.
+ *
+ * @param ctx an AVTEA context
+ * @param key a key of 16 bytes used for encryption/decryption
+ * @param rounds the number of rounds in TEA (64 is the "standard")
+ */
+void av_tea_init(struct AVTEA *ctx, const uint8_t key[16], int rounds);
+
+/**
+ * Encrypt or decrypt a buffer using a previously initialized context.
+ *
+ * @param ctx an AVTEA context
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst
+ * @param count number of 8 byte blocks
+ * @param iv initialization vector for CBC mode, if NULL then ECB will be used
+ * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_tea_crypt(struct AVTEA *ctx, uint8_t *dst, const uint8_t *src,
+ int count, uint8_t *iv, int decrypt);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_TEA_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/threadmessage.h b/TMessagesProj/jni/ffmpeg/include/libavutil/threadmessage.h
new file mode 100644
index 000000000..e256cae9d
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/threadmessage.h
@@ -0,0 +1,107 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_THREADMESSAGE_H
+#define AVUTIL_THREADMESSAGE_H
+
+typedef struct AVThreadMessageQueue AVThreadMessageQueue;
+
+typedef enum AVThreadMessageFlags {
+
+ /**
+ * Perform non-blocking operation.
+ * If this flag is set, send and recv operations are non-blocking and
+ * return AVERROR(EAGAIN) immediately if they can not proceed.
+ */
+ AV_THREAD_MESSAGE_NONBLOCK = 1,
+
+} AVThreadMessageFlags;
+
+/**
+ * Allocate a new message queue.
+ *
+ * @param mq pointer to the message queue
+ * @param nelem maximum number of elements in the queue
+ * @param elsize size of each element in the queue
+ * @return >=0 for success; <0 for error, in particular AVERROR(ENOSYS) if
+ * lavu was built without thread support
+ */
+int av_thread_message_queue_alloc(AVThreadMessageQueue **mq,
+ unsigned nelem,
+ unsigned elsize);
+
+/**
+ * Free a message queue.
+ *
+ * The message queue must no longer be in use by another thread.
+ */
+void av_thread_message_queue_free(AVThreadMessageQueue **mq);
+
+/**
+ * Send a message on the queue.
+ */
+int av_thread_message_queue_send(AVThreadMessageQueue *mq,
+ void *msg,
+ unsigned flags);
+
+/**
+ * Receive a message from the queue.
+ */
+int av_thread_message_queue_recv(AVThreadMessageQueue *mq,
+ void *msg,
+ unsigned flags);
+
+/**
+ * Set the sending error code.
+ *
+ * If the error code is set to non-zero, av_thread_message_queue_recv() will
+ * return it immediately when there are no longer available messages.
+ * Conventional values, such as AVERROR_EOF or AVERROR(EAGAIN), can be used
+ * to cause the receiving thread to stop or suspend its operation.
+ */
+void av_thread_message_queue_set_err_send(AVThreadMessageQueue *mq,
+ int err);
+
+/**
+ * Set the receiving error code.
+ *
+ * If the error code is set to non-zero, av_thread_message_queue_send() will
+ * return it immediately. Conventional values, such as AVERROR_EOF or
+ * AVERROR(EAGAIN), can be used to cause the sending thread to stop or
+ * suspend its operation.
+ */
+void av_thread_message_queue_set_err_recv(AVThreadMessageQueue *mq,
+ int err);
+
+/**
+ * Set the optional free message callback function which will be called if an
+ * operation is removing messages from the queue.
+ */
+void av_thread_message_queue_set_free_func(AVThreadMessageQueue *mq,
+ void (*free_func)(void *msg));
+
+/**
+ * Flush the message queue
+ *
+ * This function is mostly equivalent to reading and free-ing every message
+ * except that it will be done in a single operation (no lock/unlock between
+ * reads).
+ */
+void av_thread_message_flush(AVThreadMessageQueue *mq);
+
+#endif /* AVUTIL_THREADMESSAGE_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/time.h b/TMessagesProj/jni/ffmpeg/include/libavutil/time.h
new file mode 100644
index 000000000..dc169b064
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/time.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2000-2003 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_TIME_H
+#define AVUTIL_TIME_H
+
+#include
+
+/**
+ * Get the current time in microseconds.
+ */
+int64_t av_gettime(void);
+
+/**
+ * Get the current time in microseconds since some unspecified starting point.
+ * On platforms that support it, the time comes from a monotonic clock
+ * This property makes this time source ideal for measuring relative time.
+ * The returned values may not be monotonic on platforms where a monotonic
+ * clock is not available.
+ */
+int64_t av_gettime_relative(void);
+
+/**
+ * Indicates with a boolean result if the av_gettime_relative() time source
+ * is monotonic.
+ */
+int av_gettime_relative_is_monotonic(void);
+
+/**
+ * Sleep for a period of time. Although the duration is expressed in
+ * microseconds, the actual delay may be rounded to the precision of the
+ * system timer.
+ *
+ * @param usec Number of microseconds to sleep.
+ * @return zero on success or (negative) error code.
+ */
+int av_usleep(unsigned usec);
+
+#endif /* AVUTIL_TIME_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/timecode.h b/TMessagesProj/jni/ffmpeg/include/libavutil/timecode.h
new file mode 100644
index 000000000..56e3975fd
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/timecode.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2006 Smartjog S.A.S, Baptiste Coudurier
+ * Copyright (c) 2011-2012 Smartjog S.A.S, Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Timecode helpers header
+ */
+
+#ifndef AVUTIL_TIMECODE_H
+#define AVUTIL_TIMECODE_H
+
+#include
+#include "rational.h"
+
+#define AV_TIMECODE_STR_SIZE 16
+
+enum AVTimecodeFlag {
+ AV_TIMECODE_FLAG_DROPFRAME = 1<<0, ///< timecode is drop frame
+ AV_TIMECODE_FLAG_24HOURSMAX = 1<<1, ///< timecode wraps after 24 hours
+ AV_TIMECODE_FLAG_ALLOWNEGATIVE = 1<<2, ///< negative time values are allowed
+};
+
+typedef struct {
+ int start; ///< timecode frame start (first base frame number)
+ uint32_t flags; ///< flags such as drop frame, +24 hours support, ...
+ AVRational rate; ///< frame rate in rational form
+ unsigned fps; ///< frame per second; must be consistent with the rate field
+} AVTimecode;
+
+/**
+ * Adjust frame number for NTSC drop frame time code.
+ *
+ * @param framenum frame number to adjust
+ * @param fps frame per second, 30 or 60
+ * @return adjusted frame number
+ * @warning adjustment is only valid in NTSC 29.97 and 59.94
+ */
+int av_timecode_adjust_ntsc_framenum2(int framenum, int fps);
+
+/**
+ * Convert frame number to SMPTE 12M binary representation.
+ *
+ * @param tc timecode data correctly initialized
+ * @param framenum frame number
+ * @return the SMPTE binary representation
+ *
+ * @note Frame number adjustment is automatically done in case of drop timecode,
+ * you do NOT have to call av_timecode_adjust_ntsc_framenum2().
+ * @note The frame number is relative to tc->start.
+ * @note Color frame (CF), binary group flags (BGF) and biphase mark polarity
+ * correction (PC) bits are set to zero.
+ */
+uint32_t av_timecode_get_smpte_from_framenum(const AVTimecode *tc, int framenum);
+
+/**
+ * Load timecode string in buf.
+ *
+ * @param buf destination buffer, must be at least AV_TIMECODE_STR_SIZE long
+ * @param tc timecode data correctly initialized
+ * @param framenum frame number
+ * @return the buf parameter
+ *
+ * @note Timecode representation can be a negative timecode and have more than
+ * 24 hours, but will only be honored if the flags are correctly set.
+ * @note The frame number is relative to tc->start.
+ */
+char *av_timecode_make_string(const AVTimecode *tc, char *buf, int framenum);
+
+/**
+ * Get the timecode string from the SMPTE timecode format.
+ *
+ * @param buf destination buffer, must be at least AV_TIMECODE_STR_SIZE long
+ * @param tcsmpte the 32-bit SMPTE timecode
+ * @param prevent_df prevent the use of a drop flag when it is known the DF bit
+ * is arbitrary
+ * @return the buf parameter
+ */
+char *av_timecode_make_smpte_tc_string(char *buf, uint32_t tcsmpte, int prevent_df);
+
+/**
+ * Get the timecode string from the 25-bit timecode format (MPEG GOP format).
+ *
+ * @param buf destination buffer, must be at least AV_TIMECODE_STR_SIZE long
+ * @param tc25bit the 25-bits timecode
+ * @return the buf parameter
+ */
+char *av_timecode_make_mpeg_tc_string(char *buf, uint32_t tc25bit);
+
+/**
+ * Init a timecode struct with the passed parameters.
+ *
+ * @param log_ctx a pointer to an arbitrary struct of which the first field
+ * is a pointer to an AVClass struct (used for av_log)
+ * @param tc pointer to an allocated AVTimecode
+ * @param rate frame rate in rational form
+ * @param flags miscellaneous flags such as drop frame, +24 hours, ...
+ * (see AVTimecodeFlag)
+ * @param frame_start the first frame number
+ * @return 0 on success, AVERROR otherwise
+ */
+int av_timecode_init(AVTimecode *tc, AVRational rate, int flags, int frame_start, void *log_ctx);
+
+/**
+ * Parse timecode representation (hh:mm:ss[:;.]ff).
+ *
+ * @param log_ctx a pointer to an arbitrary struct of which the first field is a
+ * pointer to an AVClass struct (used for av_log).
+ * @param tc pointer to an allocated AVTimecode
+ * @param rate frame rate in rational form
+ * @param str timecode string which will determine the frame start
+ * @return 0 on success, AVERROR otherwise
+ */
+int av_timecode_init_from_string(AVTimecode *tc, AVRational rate, const char *str, void *log_ctx);
+
+/**
+ * Check if the timecode feature is available for the given frame rate
+ *
+ * @return 0 if supported, <0 otherwise
+ */
+int av_timecode_check_frame_rate(AVRational rate);
+
+#endif /* AVUTIL_TIMECODE_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/timestamp.h b/TMessagesProj/jni/ffmpeg/include/libavutil/timestamp.h
new file mode 100644
index 000000000..f010a7ee3
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/timestamp.h
@@ -0,0 +1,78 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * timestamp utils, mostly useful for debugging/logging purposes
+ */
+
+#ifndef AVUTIL_TIMESTAMP_H
+#define AVUTIL_TIMESTAMP_H
+
+#include "common.h"
+
+#if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS) && !defined(PRId64)
+#error missing -D__STDC_FORMAT_MACROS / #define __STDC_FORMAT_MACROS
+#endif
+
+#define AV_TS_MAX_STRING_SIZE 32
+
+/**
+ * Fill the provided buffer with a string containing a timestamp
+ * representation.
+ *
+ * @param buf a buffer with size in bytes of at least AV_TS_MAX_STRING_SIZE
+ * @param ts the timestamp to represent
+ * @return the buffer in input
+ */
+static inline char *av_ts_make_string(char *buf, int64_t ts)
+{
+ if (ts == AV_NOPTS_VALUE) snprintf(buf, AV_TS_MAX_STRING_SIZE, "NOPTS");
+ else snprintf(buf, AV_TS_MAX_STRING_SIZE, "%"PRId64, ts);
+ return buf;
+}
+
+/**
+ * Convenience macro, the return value should be used only directly in
+ * function arguments but never stand-alone.
+ */
+#define av_ts2str(ts) av_ts_make_string((char[AV_TS_MAX_STRING_SIZE]){0}, ts)
+
+/**
+ * Fill the provided buffer with a string containing a timestamp time
+ * representation.
+ *
+ * @param buf a buffer with size in bytes of at least AV_TS_MAX_STRING_SIZE
+ * @param ts the timestamp to represent
+ * @param tb the timebase of the timestamp
+ * @return the buffer in input
+ */
+static inline char *av_ts_make_time_string(char *buf, int64_t ts, AVRational *tb)
+{
+ if (ts == AV_NOPTS_VALUE) snprintf(buf, AV_TS_MAX_STRING_SIZE, "NOPTS");
+ else snprintf(buf, AV_TS_MAX_STRING_SIZE, "%.6g", av_q2d(*tb) * ts);
+ return buf;
+}
+
+/**
+ * Convenience macro, the return value should be used only directly in
+ * function arguments but never stand-alone.
+ */
+#define av_ts2timestr(ts, tb) av_ts_make_time_string((char[AV_TS_MAX_STRING_SIZE]){0}, ts, tb)
+
+#endif /* AVUTIL_TIMESTAMP_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/tree.h b/TMessagesProj/jni/ffmpeg/include/libavutil/tree.h
new file mode 100644
index 000000000..e1aefaa9f
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/tree.h
@@ -0,0 +1,138 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * A tree container.
+ * @author Michael Niedermayer
+ */
+
+#ifndef AVUTIL_TREE_H
+#define AVUTIL_TREE_H
+
+#include "attributes.h"
+#include "version.h"
+
+/**
+ * @addtogroup lavu_tree AVTree
+ * @ingroup lavu_data
+ *
+ * Low-complexity tree container
+ *
+ * Insertion, removal, finding equal, largest which is smaller than and
+ * smallest which is larger than, all have O(log n) worst-case complexity.
+ * @{
+ */
+
+
+struct AVTreeNode;
+extern const int av_tree_node_size;
+
+/**
+ * Allocate an AVTreeNode.
+ */
+struct AVTreeNode *av_tree_node_alloc(void);
+
+/**
+ * Find an element.
+ * @param root a pointer to the root node of the tree
+ * @param next If next is not NULL, then next[0] will contain the previous
+ * element and next[1] the next element. If either does not exist,
+ * then the corresponding entry in next is unchanged.
+ * @param cmp compare function used to compare elements in the tree,
+ * API identical to that of Standard C's qsort
+ * It is guranteed that the first and only the first argument to cmp()
+ * will be the key parameter to av_tree_find(), thus it could if the
+ * user wants, be a different type (like an opaque context).
+ * @return An element with cmp(key, elem) == 0 or NULL if no such element
+ * exists in the tree.
+ */
+void *av_tree_find(const struct AVTreeNode *root, void *key,
+ int (*cmp)(const void *key, const void *b), void *next[2]);
+
+/**
+ * Insert or remove an element.
+ *
+ * If *next is NULL, then the supplied element will be removed if it exists.
+ * If *next is non-NULL, then the supplied element will be inserted, unless
+ * it already exists in the tree.
+ *
+ * @param rootp A pointer to a pointer to the root node of the tree; note that
+ * the root node can change during insertions, this is required
+ * to keep the tree balanced.
+ * @param key pointer to the element key to insert in the tree
+ * @param next Used to allocate and free AVTreeNodes. For insertion the user
+ * must set it to an allocated and zeroed object of at least
+ * av_tree_node_size bytes size. av_tree_insert() will set it to
+ * NULL if it has been consumed.
+ * For deleting elements *next is set to NULL by the user and
+ * av_tree_insert() will set it to the AVTreeNode which was
+ * used for the removed element.
+ * This allows the use of flat arrays, which have
+ * lower overhead compared to many malloced elements.
+ * You might want to define a function like:
+ * @code
+ * void *tree_insert(struct AVTreeNode **rootp, void *key,
+ * int (*cmp)(void *key, const void *b),
+ * AVTreeNode **next)
+ * {
+ * if (!*next)
+ * *next = av_mallocz(av_tree_node_size);
+ * return av_tree_insert(rootp, key, cmp, next);
+ * }
+ * void *tree_remove(struct AVTreeNode **rootp, void *key,
+ * int (*cmp)(void *key, const void *b, AVTreeNode **next))
+ * {
+ * av_freep(next);
+ * return av_tree_insert(rootp, key, cmp, next);
+ * }
+ * @endcode
+ * @param cmp compare function used to compare elements in the tree, API identical
+ * to that of Standard C's qsort
+ * @return If no insertion happened, the found element; if an insertion or
+ * removal happened, then either key or NULL will be returned.
+ * Which one it is depends on the tree state and the implementation. You
+ * should make no assumptions that it's one or the other in the code.
+ */
+void *av_tree_insert(struct AVTreeNode **rootp, void *key,
+ int (*cmp)(const void *key, const void *b),
+ struct AVTreeNode **next);
+
+void av_tree_destroy(struct AVTreeNode *t);
+
+/**
+ * Apply enu(opaque, &elem) to all the elements in the tree in a given range.
+ *
+ * @param cmp a comparison function that returns < 0 for a element below the
+ * range, > 0 for a element above the range and == 0 for a
+ * element inside the range
+ *
+ * @note The cmp function should use the same ordering used to construct the
+ * tree.
+ */
+void av_tree_enumerate(struct AVTreeNode *t, void *opaque,
+ int (*cmp)(void *opaque, void *elem),
+ int (*enu)(void *opaque, void *elem));
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_TREE_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/twofish.h b/TMessagesProj/jni/ffmpeg/include/libavutil/twofish.h
new file mode 100644
index 000000000..813cfecdf
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/twofish.h
@@ -0,0 +1,70 @@
+/*
+ * An implementation of the TwoFish algorithm
+ * Copyright (c) 2015 Supraja Meedinti
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_TWOFISH_H
+#define AVUTIL_TWOFISH_H
+
+#include
+
+
+/**
+ * @file
+ * @brief Public header for libavutil TWOFISH algorithm
+ * @defgroup lavu_twofish TWOFISH
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+extern const int av_twofish_size;
+
+struct AVTWOFISH;
+
+/**
+ * Allocate an AVTWOFISH context
+ * To free the struct: av_free(ptr)
+ */
+struct AVTWOFISH *av_twofish_alloc(void);
+
+/**
+ * Initialize an AVTWOFISH context.
+ *
+ * @param ctx an AVTWOFISH context
+ * @param key a key of size ranging from 1 to 32 bytes used for encryption/decryption
+ * @param key_bits number of keybits: 128, 192, 256 If less than the required, padded with zeroes to nearest valid value; return value is 0 if key_bits is 128/192/256, -1 if less than 0, 1 otherwise
+ */
+int av_twofish_init(struct AVTWOFISH *ctx, const uint8_t *key, int key_bits);
+
+/**
+ * Encrypt or decrypt a buffer using a previously initialized context
+ *
+ * @param ctx an AVTWOFISH context
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst
+ * @param count number of 16 byte blocks
+ * @paran iv initialization vector for CBC mode, NULL for ECB mode
+ * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_twofish_crypt(struct AVTWOFISH *ctx, uint8_t *dst, const uint8_t *src, int count, uint8_t* iv, int decrypt);
+
+/**
+ * @}
+ */
+#endif /* AVUTIL_TWOFISH_H */
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/version.h b/TMessagesProj/jni/ffmpeg/include/libavutil/version.h
new file mode 100644
index 000000000..22cd66b55
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/version.h
@@ -0,0 +1,129 @@
+/*
+ * copyright (c) 2003 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_VERSION_H
+#define AVUTIL_VERSION_H
+
+#include "macros.h"
+
+/**
+ * @addtogroup version_utils
+ *
+ * Useful to check and match library version in order to maintain
+ * backward compatibility.
+ *
+ * @{
+ */
+
+#define AV_VERSION_INT(a, b, c) ((a)<<16 | (b)<<8 | (c))
+#define AV_VERSION_DOT(a, b, c) a ##.## b ##.## c
+#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
+
+/**
+ * Extract version components from the full ::AV_VERSION_INT int as returned
+ * by functions like ::avformat_version() and ::avcodec_version()
+ */
+#define AV_VERSION_MAJOR(a) ((a) >> 16)
+#define AV_VERSION_MINOR(a) (((a) & 0x00FF00) >> 8)
+#define AV_VERSION_MICRO(a) ((a) & 0xFF)
+
+/**
+ * @}
+ */
+
+/**
+ * @file
+ * @ingroup lavu
+ * Libavutil version macros
+ */
+
+/**
+ * @defgroup lavu_ver Version and Build diagnostics
+ *
+ * Macros and function useful to check at compiletime and at runtime
+ * which version of libavutil is in use.
+ *
+ * @{
+ */
+
+#define LIBAVUTIL_VERSION_MAJOR 55
+#define LIBAVUTIL_VERSION_MINOR 11
+#define LIBAVUTIL_VERSION_MICRO 100
+
+#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
+ LIBAVUTIL_VERSION_MINOR, \
+ LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_VERSION AV_VERSION(LIBAVUTIL_VERSION_MAJOR, \
+ LIBAVUTIL_VERSION_MINOR, \
+ LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_BUILD LIBAVUTIL_VERSION_INT
+
+#define LIBAVUTIL_IDENT "Lavu" AV_STRINGIFY(LIBAVUTIL_VERSION)
+
+/**
+ * @}
+ *
+ * @defgroup depr_guards Deprecation guards
+ * FF_API_* defines may be placed below to indicate public API that will be
+ * dropped at a future version bump. The defines themselves are not part of
+ * the public API and may change, break or disappear at any time.
+ *
+ * @note, when bumping the major version it is recommended to manually
+ * disable each FF_API_* in its own commit instead of disabling them all
+ * at once through the bump. This improves the git bisect-ability of the change.
+ *
+ * @{
+ */
+
+#ifndef FF_API_VDPAU
+#define FF_API_VDPAU (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_XVMC
+#define FF_API_XVMC (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_OPT_TYPE_METADATA
+#define FF_API_OPT_TYPE_METADATA (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_DLOG
+#define FF_API_DLOG (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_VAAPI
+#define FF_API_VAAPI (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_FRAME_QP
+#define FF_API_FRAME_QP (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_PLUS1_MINUS1
+#define FF_API_PLUS1_MINUS1 (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_ERROR_FRAME
+#define FF_API_ERROR_FRAME (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_CRC_BIG_TABLE
+#define FF_API_CRC_BIG_TABLE (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_VERSION_H */
+
diff --git a/TMessagesProj/jni/ffmpeg/include/libavutil/xtea.h b/TMessagesProj/jni/ffmpeg/include/libavutil/xtea.h
new file mode 100644
index 000000000..735427c10
--- /dev/null
+++ b/TMessagesProj/jni/ffmpeg/include/libavutil/xtea.h
@@ -0,0 +1,94 @@
+/*
+ * A 32-bit implementation of the XTEA algorithm
+ * Copyright (c) 2012 Samuel Pitoiset
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_XTEA_H
+#define AVUTIL_XTEA_H
+
+#include
+
+/**
+ * @file
+ * @brief Public header for libavutil XTEA algorithm
+ * @defgroup lavu_xtea XTEA
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+typedef struct AVXTEA {
+ uint32_t key[16];
+} AVXTEA;
+
+/**
+ * Allocate an AVXTEA context.
+ */
+AVXTEA *av_xtea_alloc(void);
+
+/**
+ * Initialize an AVXTEA context.
+ *
+ * @param ctx an AVXTEA context
+ * @param key a key of 16 bytes used for encryption/decryption,
+ * interpreted as big endian 32 bit numbers
+ */
+void av_xtea_init(struct AVXTEA *ctx, const uint8_t key[16]);
+
+/**
+ * Initialize an AVXTEA context.
+ *
+ * @param ctx an AVXTEA context
+ * @param key a key of 16 bytes used for encryption/decryption,
+ * interpreted as little endian 32 bit numbers
+ */
+void av_xtea_le_init(struct AVXTEA *ctx, const uint8_t key[16]);
+
+/**
+ * Encrypt or decrypt a buffer using a previously initialized context,
+ * in big endian format.
+ *
+ * @param ctx an AVXTEA context
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst
+ * @param count number of 8 byte blocks
+ * @param iv initialization vector for CBC mode, if NULL then ECB will be used
+ * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_xtea_crypt(struct AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
+ int count, uint8_t *iv, int decrypt);
+
+/**
+ * Encrypt or decrypt a buffer using a previously initialized context,
+ * in little endian format.
+ *
+ * @param ctx an AVXTEA context
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst
+ * @param count number of 8 byte blocks
+ * @param iv initialization vector for CBC mode, if NULL then ECB will be used
+ * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_xtea_le_crypt(struct AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
+ int count, uint8_t *iv, int decrypt);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_XTEA_H */
diff --git a/TMessagesProj/jni/gif.c b/TMessagesProj/jni/gif.c
deleted file mode 100644
index 64dda4793..000000000
--- a/TMessagesProj/jni/gif.c
+++ /dev/null
@@ -1,847 +0,0 @@
-//thanks to https://github.com/koral--/android-gif-drawable
-/*
- MIT License
- Copyright (c)
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
-
- // Copyright (c) 2011 Google Inc. All rights reserved.
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following disclaimer
- // in the documentation and/or other materials provided with the
- // distribution.
- // * Neither the name of Google Inc. nor the names of its
- // contributors may be used to endorse or promote products derived from
- // this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- The GIFLIB distribution is Copyright (c) 1997 Eric S. Raymond
- */
-
-#include
-#include
-#include
-#include
-#include "gif.h"
-#include "giflib/gif_lib.h"
-
-#define D_GIF_ERR_NO_FRAMES 1000
-#define D_GIF_ERR_INVALID_SCR_DIMS 1001
-#define D_GIF_ERR_INVALID_IMG_DIMS 1002
-#define D_GIF_ERR_IMG_NOT_CONFINED 1003
-
-typedef struct {
- uint8_t blue;
- uint8_t green;
- uint8_t red;
- uint8_t alpha;
-} argb;
-
-typedef struct {
- unsigned int duration;
- int transpIndex;
- unsigned char disposalMethod;
-} FrameInfo;
-
-typedef struct {
- GifFileType *gifFilePtr;
- unsigned long lastFrameReaminder;
- unsigned long nextStartTime;
- int currentIndex;
- unsigned int lastDrawIndex;
- FrameInfo *infos;
- argb *backupPtr;
- int startPos;
- unsigned char *rasterBits;
- char *comment;
- unsigned short loopCount;
- int currentLoop;
- jfloat speedFactor;
-} GifInfo;
-
-static ColorMapObject *defaultCmap = NULL;
-
-static ColorMapObject *genDefColorMap(void) {
- ColorMapObject *cmap = GifMakeMapObject(256, NULL);
- if (cmap != NULL) {
- int iColor;
- for (iColor = 0; iColor < 256; iColor++) {
- cmap->Colors[iColor].Red = (GifByteType) iColor;
- cmap->Colors[iColor].Green = (GifByteType) iColor;
- cmap->Colors[iColor].Blue = (GifByteType) iColor;
- }
- }
- return cmap;
-}
-
-jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env) {
- defaultCmap = genDefColorMap();
- if (defaultCmap == NULL) {
- return -1;
- }
- return JNI_VERSION_1_6;
-}
-
-void gifOnJNIUnload(JavaVM *vm, void *reserved) {
- GifFreeMapObject(defaultCmap);
-}
-
-static int fileReadFunc(GifFileType *gif, GifByteType *bytes, int size) {
- FILE *file = (FILE *)gif->UserData;
- return fread(bytes, 1, size, file);
-}
-
-static int fileRewindFun(GifInfo *info) {
- return fseek(info->gifFilePtr->UserData, info->startPos, SEEK_SET);
-}
-
-static unsigned long getRealTime() {
- struct timespec ts;
- const clockid_t id = CLOCK_MONOTONIC;
- if (id != (clockid_t) - 1 && clock_gettime(id, &ts) != -1) {
- return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
- }
- return -1;
-}
-
-static void cleanUp(GifInfo *info) {
- if (info->backupPtr) {
- free(info->backupPtr);
- info->backupPtr = NULL;
- }
- if (info->infos) {
- free(info->infos);
- info->infos = NULL;
- }
- if (info->rasterBits) {
- free(info->rasterBits);
- info->rasterBits = NULL;
- }
- if (info->comment) {
- free(info->comment);
- info->comment = NULL;
- }
-
- GifFileType *GifFile = info->gifFilePtr;
- if (GifFile->SColorMap == defaultCmap) {
- GifFile->SColorMap = NULL;
- }
- if (GifFile->SavedImages != NULL) {
- SavedImage *sp;
- for (sp = GifFile->SavedImages; sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
- if (sp->ImageDesc.ColorMap != NULL) {
- GifFreeMapObject(sp->ImageDesc.ColorMap);
- sp->ImageDesc.ColorMap = NULL;
- }
- }
- free(GifFile->SavedImages);
- GifFile->SavedImages = NULL;
- }
- DGifCloseFile(GifFile);
- free(info);
-}
-
-static int getComment(GifByteType *Bytes, char **cmt) {
- unsigned int len = (unsigned int) Bytes[0];
- unsigned int offset = *cmt != NULL ? strlen(*cmt) : 0;
- char *ret = realloc(*cmt, (len + offset + 1) * sizeof(char));
- if (ret != NULL) {
- memcpy(ret + offset, &Bytes[1], len);
- ret[len + offset] = 0;
- *cmt = ret;
- return GIF_OK;
- }
- return GIF_ERROR;
-}
-
-static void packARGB32(argb *pixel, GifByteType alpha, GifByteType red, GifByteType green, GifByteType blue) {
- pixel->alpha = alpha;
- pixel->red = red;
- pixel->green = green;
- pixel->blue = blue;
-}
-
-static void getColorFromTable(int idx, argb *dst, const ColorMapObject *cmap) {
- int colIdx = (idx >= cmap->ColorCount) ? 0 : idx;
- GifColorType *col = &cmap->Colors[colIdx];
- packARGB32(dst, 0xFF, col->Red, col->Green, col->Blue);
-}
-
-static void eraseColor(argb *bm, int w, int h, argb color) {
- int i;
- for (i = 0; i < w * h; i++) {
- *(bm + i) = color;
- }
-}
-
-static inline bool setupBackupBmp(GifInfo *info, short transpIndex) {
- GifFileType *fGIF = info->gifFilePtr;
- info->backupPtr = calloc(fGIF->SWidth * fGIF->SHeight, sizeof(argb));
- if (!info->backupPtr) {
- info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return false;
- }
- argb paintingColor;
- if (transpIndex == -1) {
- getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
- } else {
- packARGB32(&paintingColor, 0, 0, 0, 0);
- }
- eraseColor(info->backupPtr, fGIF->SWidth, fGIF->SHeight, paintingColor);
- return true;
-}
-
-static int readExtensions(int ExtFunction, GifByteType *ExtData, GifInfo *info) {
- if (ExtData == NULL) {
- return GIF_OK;
- }
- if (ExtFunction == GRAPHICS_EXT_FUNC_CODE && ExtData[0] == 4) {
- FrameInfo *fi = &info->infos[info->gifFilePtr->ImageCount];
- fi->transpIndex = -1;
- char *b = (char*) ExtData + 1;
- short delay = ((b[2] << 8) | b[1]);
- fi->duration = delay > 1 ? delay * 10 : 100;
- fi->disposalMethod = ((b[0] >> 2) & 7);
- if (ExtData[1] & 1) {
- fi->transpIndex = 0xff & b[3];
- }
- if (fi->disposalMethod == 3 && info->backupPtr == NULL) {
- if (!setupBackupBmp(info, fi->transpIndex)) {
- return GIF_ERROR;
- }
- }
- } else if (ExtFunction == COMMENT_EXT_FUNC_CODE) {
- if (getComment(ExtData, &info->comment) == GIF_ERROR) {
- info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return GIF_ERROR;
- }
- } else if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 11) {
- if (strncmp("NETSCAPE2.0", &ExtData[1], 11) == 0 || strncmp("ANIMEXTS1.0", &ExtData[1], 11) == 0) {
- if (DGifGetExtensionNext(info->gifFilePtr, &ExtData, &ExtFunction) == GIF_ERROR) {
- return GIF_ERROR;
- }
- if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 3 && ExtData[1] == 1) {
- info->loopCount = (unsigned short) (ExtData[2] + (ExtData[3] << 8));
- }
- }
- }
- return GIF_OK;
-}
-
-static int DDGifSlurp(GifFileType *GifFile, GifInfo* info, bool shouldDecode) {
- GifRecordType RecordType;
- GifByteType *ExtData;
- int codeSize;
- int ExtFunction;
- size_t ImageSize;
- do {
- if (DGifGetRecordType(GifFile, &RecordType) == GIF_ERROR) {
- return (GIF_ERROR);
- }
- switch (RecordType) {
- case IMAGE_DESC_RECORD_TYPE:
-
- if (DGifGetImageDesc(GifFile, !shouldDecode) == GIF_ERROR) {
- return (GIF_ERROR);
- }
- int i = shouldDecode ? info->currentIndex : GifFile->ImageCount - 1;
- SavedImage *sp = &GifFile->SavedImages[i];
- ImageSize = sp->ImageDesc.Width * sp->ImageDesc.Height;
-
- if (sp->ImageDesc.Width < 1 || sp->ImageDesc.Height < 1 || ImageSize > (SIZE_MAX / sizeof(GifPixelType))) {
- GifFile->Error = D_GIF_ERR_INVALID_IMG_DIMS;
- return GIF_ERROR;
- }
- if (sp->ImageDesc.Width > GifFile->SWidth || sp->ImageDesc.Height > GifFile->SHeight) {
- GifFile->Error = D_GIF_ERR_IMG_NOT_CONFINED;
- return GIF_ERROR;
- }
- if (shouldDecode) {
- sp->RasterBits = info->rasterBits;
- if (sp->ImageDesc.Interlace) {
- int i, j;
- int InterlacedOffset[] = { 0, 4, 2, 1 };
- int InterlacedJumps[] = { 8, 8, 4, 2 };
- for (i = 0; i < 4; i++) {
- for (j = InterlacedOffset[i]; j < sp->ImageDesc.Height; j += InterlacedJumps[i]) {
- if (DGifGetLine(GifFile, sp->RasterBits + j * sp->ImageDesc.Width, sp->ImageDesc.Width) == GIF_ERROR) {
- return GIF_ERROR;
- }
- }
- }
- } else {
- if (DGifGetLine(GifFile, sp->RasterBits, ImageSize) == GIF_ERROR) {
- return (GIF_ERROR);
- }
- }
- if (info->currentIndex >= GifFile->ImageCount - 1) {
- if (info->loopCount > 0) {
- info->currentLoop++;
- }
- if (fileRewindFun(info) != 0) {
- info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- }
- return GIF_OK;
- } else {
- if (DGifGetCode(GifFile, &codeSize, &ExtData) == GIF_ERROR) {
- return (GIF_ERROR);
- }
- while (ExtData != NULL) {
- if (DGifGetCodeNext(GifFile, &ExtData) == GIF_ERROR) {
- return (GIF_ERROR);
- }
- }
- }
- break;
-
- case EXTENSION_RECORD_TYPE:
- if (DGifGetExtension(GifFile, &ExtFunction, &ExtData) == GIF_ERROR) {
- return (GIF_ERROR);
- }
-
- if (!shouldDecode) {
- FrameInfo *tmpInfos = realloc(info->infos, (GifFile->ImageCount + 1) * sizeof(FrameInfo));
- if (tmpInfos == NULL) {
- return GIF_ERROR;
- }
- info->infos = tmpInfos;
- if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
- return GIF_ERROR;
- }
- }
- while (ExtData != NULL) {
- if (DGifGetExtensionNext(GifFile, &ExtData, &ExtFunction) == GIF_ERROR) {
- return (GIF_ERROR);
- }
- if (!shouldDecode) {
- if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
- return GIF_ERROR;
- }
- }
- }
- break;
-
- case TERMINATE_RECORD_TYPE:
- break;
-
- default:
- break;
- }
- } while (RecordType != TERMINATE_RECORD_TYPE);
- bool ok = true;
- if (shouldDecode) {
- ok = (fileRewindFun(info) == 0);
- }
- if (ok) {
- return (GIF_OK);
- } else {
- info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
- return (GIF_ERROR);
- }
-}
-
-static void copyLine(argb *dst, const unsigned char *src, const ColorMapObject *cmap, int transparent, int width) {
- for (; width > 0; width--, src++, dst++) {
- if (*src != transparent) {
- getColorFromTable(*src, dst, cmap);
- }
- }
-}
-
-static argb *getAddr(argb *bm, int width, int left, int top) {
- return bm + top * width + left;
-}
-
-static void blitNormal(argb *bm, int width, int height, const SavedImage *frame, const ColorMapObject *cmap, int transparent) {
- const unsigned char* src = (unsigned char*) frame->RasterBits;
- argb *dst = getAddr(bm, width, frame->ImageDesc.Left, frame->ImageDesc.Top);
- GifWord copyWidth = frame->ImageDesc.Width;
- if (frame->ImageDesc.Left + copyWidth > width) {
- copyWidth = width - frame->ImageDesc.Left;
- }
-
- GifWord copyHeight = frame->ImageDesc.Height;
- if (frame->ImageDesc.Top + copyHeight > height) {
- copyHeight = height - frame->ImageDesc.Top;
- }
-
- for (; copyHeight > 0; copyHeight--) {
- copyLine(dst, src, cmap, transparent, copyWidth);
- src += frame->ImageDesc.Width;
- dst += width;
- }
-}
-
-static void fillRect(argb *bm, int bmWidth, int bmHeight, GifWord left, GifWord top, GifWord width, GifWord height, argb col) {
- uint32_t* dst = (uint32_t*) getAddr(bm, bmWidth, left, top);
- GifWord copyWidth = width;
- if (left + copyWidth > bmWidth) {
- copyWidth = bmWidth - left;
- }
-
- GifWord copyHeight = height;
- if (top + copyHeight > bmHeight) {
- copyHeight = bmHeight - top;
- }
- uint32_t* pColor = (uint32_t *) (&col);
- for (; copyHeight > 0; copyHeight--) {
- memset(dst, *pColor, copyWidth * sizeof(argb));
- dst += bmWidth;
- }
-}
-
-static void drawFrame(argb *bm, int bmWidth, int bmHeight, const SavedImage *frame, const ColorMapObject *cmap, short transpIndex) {
- if (frame->ImageDesc.ColorMap != NULL) {
- cmap = frame->ImageDesc.ColorMap;
- if (cmap->ColorCount != (1 << cmap->BitsPerPixel)) {
- cmap = defaultCmap;
- }
- }
- blitNormal(bm, bmWidth, bmHeight, frame, cmap, transpIndex);
-}
-
-static bool checkIfCover(const SavedImage *target, const SavedImage *covered) {
- if (target->ImageDesc.Left <= covered->ImageDesc.Left
- && covered->ImageDesc.Left + covered->ImageDesc.Width
- <= target->ImageDesc.Left + target->ImageDesc.Width
- && target->ImageDesc.Top <= covered->ImageDesc.Top
- && covered->ImageDesc.Top + covered->ImageDesc.Height
- <= target->ImageDesc.Top + target->ImageDesc.Height) {
- return true;
- }
- return false;
-}
-
-static inline void disposeFrameIfNeeded(argb *bm, GifInfo *info, unsigned int idx) {
- argb* backup = info->backupPtr;
- argb color;
- packARGB32(&color, 0, 0, 0, 0);
- GifFileType *fGif = info->gifFilePtr;
- SavedImage* cur = &fGif->SavedImages[idx - 1];
- SavedImage* next = &fGif->SavedImages[idx];
- bool curTrans = info->infos[idx - 1].transpIndex != -1;
- int curDisposal = info->infos[idx - 1].disposalMethod;
- bool nextTrans = info->infos[idx].transpIndex != -1;
- int nextDisposal = info->infos[idx].disposalMethod;
- argb *tmp;
- if ((curDisposal == 2 || curDisposal == 3) && (nextTrans || !checkIfCover(next, cur))) {
- switch (curDisposal) {
- case 2:
-
- fillRect(bm, fGif->SWidth, fGif->SHeight, cur->ImageDesc.Left, cur->ImageDesc.Top, cur->ImageDesc.Width, cur->ImageDesc.Height, color);
- break;
-
- case 3:
- tmp = bm;
- bm = backup;
- backup = tmp;
- break;
- }
- }
-
- if (nextDisposal == 3) {
- memcpy(backup, bm, fGif->SWidth * fGif->SHeight * sizeof(argb));
- }
-}
-
-static void reset(GifInfo *info) {
- if (fileRewindFun(info) != 0) {
- return;
- }
- info->nextStartTime = 0;
- info->currentLoop = -1;
- info->currentIndex = -1;
-}
-
-static void getBitmap(argb *bm, GifInfo *info) {
- GifFileType* fGIF = info->gifFilePtr;
-
- argb paintingColor;
- int i = info->currentIndex;
- if (DDGifSlurp(fGIF, info, true) == GIF_ERROR) {
- return;
- }
- SavedImage* cur = &fGIF->SavedImages[i];
- int transpIndex = info->infos[i].transpIndex;
- if (i == 0) {
- if (transpIndex == -1) {
- getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
- } else {
- packARGB32(&paintingColor, 0, 0, 0, 0);
- }
- eraseColor(bm, fGIF->SWidth, fGIF->SHeight, paintingColor);
- } else {
- disposeFrameIfNeeded(bm, info, i);
- }
- drawFrame(bm, fGIF->SWidth, fGIF->SHeight, cur, fGIF->SColorMap, transpIndex);
-}
-
-static void setMetaData(int width, int height, int ImageCount, int errorCode, JNIEnv *env, jintArray metaData) {
- jint *const ints = (*env)->GetIntArrayElements(env, metaData, 0);
- if (ints == NULL) {
- return;
- }
- ints[0] = width;
- ints[1] = height;
- ints[2] = ImageCount;
- ints[3] = errorCode;
- (*env)->ReleaseIntArrayElements(env, metaData, ints, 0);
-}
-
-static jint open(GifFileType *GifFileIn, int Error, int startPos, JNIEnv *env, jintArray metaData) {
- if (startPos < 0) {
- Error = D_GIF_ERR_NOT_READABLE;
- DGifCloseFile(GifFileIn);
- }
- if (Error != 0 || GifFileIn == NULL) {
- setMetaData(0, 0, 0, Error, env, metaData);
- return (jint) NULL;
- }
- int width = GifFileIn->SWidth, height = GifFileIn->SHeight;
- unsigned int wxh = width * height;
- if (wxh < 1 || wxh > INT_MAX) {
- DGifCloseFile(GifFileIn);
- setMetaData(width, height, 0, D_GIF_ERR_INVALID_SCR_DIMS, env, metaData);
- return (jint) NULL;
- }
- GifInfo *info = malloc(sizeof(GifInfo));
- if (info == NULL) {
- DGifCloseFile(GifFileIn);
- setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
- return (jint) NULL;
- }
- info->gifFilePtr = GifFileIn;
- info->startPos = startPos;
- info->currentIndex = -1;
- info->nextStartTime = 0;
- info->lastFrameReaminder = ULONG_MAX;
- info->comment = NULL;
- info->loopCount = 0;
- info->currentLoop = -1;
- info->speedFactor = 1.0;
- info->rasterBits = calloc(GifFileIn->SHeight * GifFileIn->SWidth, sizeof(GifPixelType));
- info->infos = malloc(sizeof(FrameInfo));
- info->backupPtr = NULL;
-
- if (info->rasterBits == NULL || info->infos == NULL) {
- cleanUp(info);
- setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
- return (jint) NULL;
- }
- info->infos->duration = 0;
- info->infos->disposalMethod = 0;
- info->infos->transpIndex = -1;
- if (GifFileIn->SColorMap == NULL || GifFileIn->SColorMap->ColorCount != (1 << GifFileIn->SColorMap->BitsPerPixel)) {
- GifFreeMapObject(GifFileIn->SColorMap);
- GifFileIn->SColorMap = defaultCmap;
- }
-
- DDGifSlurp(GifFileIn, info, false);
-
- int imgCount = GifFileIn->ImageCount;
-
- if (imgCount < 1) {
- Error = D_GIF_ERR_NO_FRAMES;
- }
- if (fileRewindFun(info) != 0) {
- Error = D_GIF_ERR_READ_FAILED;
- }
- if (Error != 0) {
- cleanUp(info);
- }
- setMetaData(width, height, imgCount, Error, env, metaData);
- return (jint)(Error == 0 ? info : NULL);
-}
-
-JNIEXPORT jlong JNICALL Java_org_telegram_ui_Components_GifDrawable_getAllocationByteCount(JNIEnv *env, jclass class, jobject gifInfo) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL) {
- return 0;
- }
- unsigned int pxCount = info->gifFilePtr->SWidth + info->gifFilePtr->SHeight;
- jlong sum = pxCount * sizeof(char);
- if (info->backupPtr != NULL) {
- sum += pxCount * sizeof(argb);
- }
- return sum;
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_reset(JNIEnv *env, jclass class, jobject gifInfo) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL) {
- return;
- }
- reset(info);
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_setSpeedFactor(JNIEnv *env, jclass class, jobject gifInfo, jfloat factor) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL) {
- return;
- }
- info->speedFactor = factor;
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToTime(JNIEnv *env, jclass class, jobject gifInfo, jint desiredPos, jintArray jPixels) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL || jPixels == NULL) {
- return;
- }
- int imgCount = info->gifFilePtr->ImageCount;
- if (imgCount <= 1) {
- return;
- }
-
- unsigned long sum = 0;
- int i;
- for (i = 0; i < imgCount; i++) {
- unsigned long newSum = sum + info->infos[i].duration;
- if (newSum >= desiredPos) {
- break;
- }
- sum = newSum;
- }
- if (i < info->currentIndex) {
- return;
- }
-
- unsigned long lastFrameRemainder = desiredPos - sum;
- if (i == imgCount - 1 && lastFrameRemainder > info->infos[i].duration) {
- lastFrameRemainder = info->infos[i].duration;
- }
- if (i > info->currentIndex) {
- jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
- if (pixels == NULL) {
- return;
- }
- while (info->currentIndex <= i) {
- info->currentIndex++;
- getBitmap((argb*) pixels, info);
- }
- (*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
- }
- info->lastFrameReaminder = lastFrameRemainder;
-
- if (info->speedFactor == 1.0) {
- info->nextStartTime = getRealTime() + lastFrameRemainder;
- } else {
- info->nextStartTime = getRealTime() + lastFrameRemainder * info->speedFactor;
- }
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToFrame(JNIEnv *env, jclass class, jobject gifInfo, jint desiredIdx, jintArray jPixels) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL|| jPixels==NULL) {
- return;
- }
- if (desiredIdx <= info->currentIndex) {
- return;
- }
-
- int imgCount = info->gifFilePtr->ImageCount;
- if (imgCount <= 1) {
- return;
- }
-
- jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
- if (pixels == NULL) {
- return;
- }
-
- info->lastFrameReaminder = 0;
- if (desiredIdx >= imgCount) {
- desiredIdx = imgCount - 1;
- }
-
- while (info->currentIndex < desiredIdx) {
- info->currentIndex++;
- getBitmap((argb *) pixels, info);
- }
- (*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
- if (info->speedFactor == 1.0) {
- info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration;
- } else {
- info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration * info->speedFactor;
- }
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_renderFrame(JNIEnv *env, jclass class, jintArray jPixels, jobject gifInfo, jintArray metaData) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL || jPixels == NULL) {
- return;
- }
- bool needRedraw = false;
- unsigned long rt = getRealTime();
-
- if (rt >= info->nextStartTime && info->currentLoop < info->loopCount) {
- if (++info->currentIndex >= info->gifFilePtr->ImageCount) {
- info->currentIndex = 0;
- }
- needRedraw = true;
- }
- jint *const rawMetaData = (*env)->GetIntArrayElements(env, metaData, 0);
- if (rawMetaData == NULL) {
- return;
- }
-
- if (needRedraw) {
- jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
- if (pixels == NULL) {
- (*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
- return;
- }
- getBitmap((argb *)pixels, info);
- rawMetaData[3] = info->gifFilePtr->Error;
-
- (*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
- unsigned int scaledDuration = info->infos[info->currentIndex].duration;
- if (info->speedFactor != 1.0) {
- scaledDuration /= info->speedFactor;
- if (scaledDuration<=0) {
- scaledDuration=1;
- } else if (scaledDuration > INT_MAX) {
- scaledDuration = INT_MAX;
- }
- }
- info->nextStartTime = rt + scaledDuration;
- rawMetaData[4] = scaledDuration;
- } else {
- long delay = info->nextStartTime-rt;
- if (delay < 0) {
- rawMetaData[4] = -1;
- } else {
- rawMetaData[4] = (int) delay;
- }
- }
- (*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_free(JNIEnv *env, jclass class, jobject gifInfo) {
- if (gifInfo == NULL) {
- return;
- }
- GifInfo *info = (GifInfo *)gifInfo;
- FILE *file = info->gifFilePtr->UserData;
- if (file) {
- fclose(file);
- }
- info->gifFilePtr->UserData = NULL;
- cleanUp(info);
-}
-
-JNIEXPORT jstring JNICALL Java_org_telegram_ui_Components_GifDrawable_getComment(JNIEnv *env, jclass class, jobject gifInfo) {
- if (gifInfo == NULL) {
- return NULL;
- }
- GifInfo *info = (GifInfo *)gifInfo;
- return (*env)->NewStringUTF(env, info->comment);
-}
-
-JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getLoopCount(JNIEnv *env, jclass class, jobject gifInfo) {
- if (gifInfo == NULL) {
- return 0;
- }
- return ((GifInfo *)gifInfo)->loopCount;
-}
-
-JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getDuration(JNIEnv *env, jclass class, jobject gifInfo) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL) {
- return 0;
- }
- int i;
- unsigned long sum = 0;
- for (i = 0; i < info->gifFilePtr->ImageCount; i++) {
- sum += info->infos[i].duration;
- }
- return sum;
-}
-
-JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getCurrentPosition(JNIEnv *env, jclass class, jobject gifInfo) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL) {
- return 0;
- }
- int idx = info->currentIndex;
- if (idx < 0 || info->gifFilePtr->ImageCount <= 1) {
- return 0;
- }
- int i;
- unsigned int sum = 0;
- for (i = 0; i < idx; i++) {
- sum += info->infos[i].duration;
- }
- unsigned long remainder = info->lastFrameReaminder == ULONG_MAX ? getRealTime() - info->nextStartTime : info->lastFrameReaminder;
- return (int) (sum + remainder);
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_saveRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL) {
- return;
- }
- info->lastFrameReaminder = getRealTime() - info->nextStartTime;
-}
-
-JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_restoreRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
- GifInfo *info = (GifInfo *)gifInfo;
- if (info == NULL || info->lastFrameReaminder == ULONG_MAX) {
- return;
- }
- info->nextStartTime = getRealTime() + info->lastFrameReaminder;
- info->lastFrameReaminder = ULONG_MAX;
-}
-
-JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_openFile(JNIEnv *env, jclass class, jintArray metaData, jstring jfname) {
- if (jfname == NULL) {
- setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
- return (jint) NULL;
- }
-
- const char *const fname = (*env)->GetStringUTFChars(env, jfname, 0);
- FILE *file = fopen(fname, "rb");
- (*env)->ReleaseStringUTFChars(env, jfname, fname);
- if (file == NULL) {
- setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
- return (jint) NULL;
- }
- int Error = 0;
- GifFileType *GifFileIn = DGifOpen(file, &fileReadFunc, &Error);
- return open(GifFileIn, Error, ftell(file), env, metaData);
-}
diff --git a/TMessagesProj/jni/gif.h b/TMessagesProj/jni/gif.h
deleted file mode 100644
index 96409be19..000000000
--- a/TMessagesProj/jni/gif.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef gif_h
-#define gif_h
-
-jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env);
-void gifOnJNIUnload(JavaVM *vm, void *reserved);
-
-#endif
\ No newline at end of file
diff --git a/TMessagesProj/jni/giflib/COPYING b/TMessagesProj/jni/giflib/COPYING
deleted file mode 100644
index b9c0b5012..000000000
--- a/TMessagesProj/jni/giflib/COPYING
+++ /dev/null
@@ -1,19 +0,0 @@
-The GIFLIB distribution is Copyright (c) 1997 Eric S. Raymond
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
diff --git a/TMessagesProj/jni/giflib/config.h b/TMessagesProj/jni/giflib/config.h
deleted file mode 100755
index e68b0d9ee..000000000
--- a/TMessagesProj/jni/giflib/config.h
+++ /dev/null
@@ -1,13 +0,0 @@
-
-// giflib config.h
-
-#ifndef GIF_CONFIG_H_DEFINED
-#define GIF_CONFIG_H_DEFINED
-
-#include
-#define HAVE_STDINT_H
-#define HAVE_FCNTL_H
-
-typedef uint32_t UINT32;
-
-#endif
diff --git a/TMessagesProj/jni/giflib/dgif_lib.c b/TMessagesProj/jni/giflib/dgif_lib.c
deleted file mode 100755
index 92442408b..000000000
--- a/TMessagesProj/jni/giflib/dgif_lib.c
+++ /dev/null
@@ -1,1167 +0,0 @@
-/******************************************************************************
-
-dgif_lib.c - GIF decoding
-
-The functions here and in egif_lib.c are partitioned carefully so that
-if you only require one of read and write capability, only one of these
-two modules will be linked. Preserve this property!
-
-*****************************************************************************/
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#ifdef _WIN32
-#include
-#endif /* _WIN32 */
-
-#include "gif_lib.h"
-#include "gif_lib_private.h"
-
-/* compose unsigned little endian value */
-#define UNSIGNED_LITTLE_ENDIAN(lo, hi) ((lo) | ((hi) << 8))
-
-/* avoid extra function call in case we use fread (TVT) */
-#define READ(_gif,_buf,_len) \
- (((GifFilePrivateType*)_gif->Private)->Read ? \
- ((GifFilePrivateType*)_gif->Private)->Read(_gif,_buf,_len) : \
- fread(_buf,1,_len,((GifFilePrivateType*)_gif->Private)->File))
-
-static int DGifGetWord(GifFileType *GifFile, GifWord *Word);
-static int DGifSetupDecompress(GifFileType *GifFile);
-static int DGifDecompressLine(GifFileType *GifFile, GifPixelType *Line,
- int LineLen);
-static int DGifGetPrefixChar(GifPrefixType *Prefix, int Code, int ClearCode);
-static int DGifDecompressInput(GifFileType *GifFile, int *Code);
-static int DGifBufferedInput(GifFileType *GifFile, GifByteType *Buf,
- GifByteType *NextByte);
-
-/******************************************************************************
- Open a new GIF file for read, given by its name.
- Returns dynamically allocated GifFileType pointer which serves as the GIF
- info record.
-******************************************************************************/
-GifFileType *
-DGifOpenFileName(const char *FileName, int *Error)
-{
- int FileHandle;
- GifFileType *GifFile;
-
- if ((FileHandle = open(FileName, O_RDONLY)) == -1) {
- if (Error != NULL)
- *Error = D_GIF_ERR_OPEN_FAILED;
- return NULL;
- }
- GifFile = DGifOpenFileHandle(FileHandle, Error);
- return GifFile;
-}
-
-/******************************************************************************
- Update a new GIF file, given its file handle.
- Returns dynamically allocated GifFileType pointer which serves as the GIF
- info record.
-******************************************************************************/
-GifFileType *
-DGifOpenFileHandle(int FileHandle, int *Error)
-{
- char Buf[GIF_STAMP_LEN + 1];
- GifFileType *GifFile;
- GifFilePrivateType *Private;
- FILE *f;
-
- GifFile = (GifFileType *)malloc(sizeof(GifFileType));
- if (GifFile == NULL) {
- if (Error != NULL)
- *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- (void)close(FileHandle);
- return NULL;
- }
-
- /*@i1@*/memset(GifFile, '\0', sizeof(GifFileType));
-
- /* Belt and suspenders, in case the null pointer isn't zero */
- GifFile->SavedImages = NULL;
- GifFile->SColorMap = NULL;
-
- Private = (GifFilePrivateType *)malloc(sizeof(GifFilePrivateType));
- if (Private == NULL) {
- if (Error != NULL)
- *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- (void)close(FileHandle);
- free((char *)GifFile);
- return NULL;
- }
-#ifdef _WIN32
- _setmode(FileHandle, O_BINARY); /* Make sure it is in binary mode. */
-#endif /* _WIN32 */
-
- f = fdopen(FileHandle, "rb"); /* Make it into a stream: */
-
- /*@-mustfreeonly@*/
- GifFile->Private = (void *)Private;
- Private->FileHandle = FileHandle;
- Private->File = f;
- Private->FileState = FILE_STATE_READ;
- Private->Read = NULL; /* don't use alternate input method (TVT) */
- GifFile->UserData = NULL; /* TVT */
- /*@=mustfreeonly@*/
-
- /* Let's see if this is a GIF file: */
- if (READ(GifFile, (unsigned char *)Buf, GIF_STAMP_LEN) != GIF_STAMP_LEN) {
- if (Error != NULL)
- *Error = D_GIF_ERR_READ_FAILED;
- (void)fclose(f);
- free((char *)Private);
- free((char *)GifFile);
- return NULL;
- }
-
- /* Check for GIF prefix at start of file */
- Buf[GIF_STAMP_LEN] = 0;
- if (strncmp(GIF_STAMP, Buf, GIF_VERSION_POS) != 0) {
- if (Error != NULL)
- *Error = D_GIF_ERR_NOT_GIF_FILE;
- (void)fclose(f);
- free((char *)Private);
- free((char *)GifFile);
- return NULL;
- }
-
- if (DGifGetScreenDesc(GifFile) == GIF_ERROR) {
- (void)fclose(f);
- free((char *)Private);
- free((char *)GifFile);
- return NULL;
- }
-
- GifFile->Error = 0;
-
- /* What version of GIF? */
- Private->gif89 = (Buf[GIF_VERSION_POS] == '9');
-
- return GifFile;
-}
-
-/******************************************************************************
- GifFileType constructor with user supplied input function (TVT)
-******************************************************************************/
-GifFileType *
-DGifOpen(void *userData, InputFunc readFunc, int *Error)
-{
- char Buf[GIF_STAMP_LEN + 1];
- GifFileType *GifFile;
- GifFilePrivateType *Private;
-
- GifFile = (GifFileType *)malloc(sizeof(GifFileType));
- if (GifFile == NULL) {
- if (Error != NULL)
- *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return NULL;
- }
-
- memset(GifFile, '\0', sizeof(GifFileType));
-
- /* Belt and suspenders, in case the null pointer isn't zero */
- GifFile->SavedImages = NULL;
- GifFile->SColorMap = NULL;
-
- Private = (GifFilePrivateType *)malloc(sizeof(GifFilePrivateType));
- if (!Private) {
- if (Error != NULL)
- *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- free((char *)GifFile);
- return NULL;
- }
-
- GifFile->Private = (void *)Private;
- Private->FileHandle = 0;
- Private->File = NULL;
- Private->FileState = FILE_STATE_READ;
-
- Private->Read = readFunc; /* TVT */
- GifFile->UserData = userData; /* TVT */
-
- /* Lets see if this is a GIF file: */
- if (READ(GifFile, (unsigned char *)Buf, GIF_STAMP_LEN) != GIF_STAMP_LEN) {
- if (Error != NULL)
- *Error = D_GIF_ERR_READ_FAILED;
- free((char *)Private);
- free((char *)GifFile);
- return NULL;
- }
-
- /* Check for GIF prefix at start of file */
- Buf[GIF_STAMP_LEN] = '\0';
- if (strncmp(GIF_STAMP, Buf, GIF_VERSION_POS) != 0) {
- if (Error != NULL)
- *Error = D_GIF_ERR_NOT_GIF_FILE;
- free((char *)Private);
- free((char *)GifFile);
- return NULL;
- }
-
- if (DGifGetScreenDesc(GifFile) == GIF_ERROR) {
- free((char *)Private);
- free((char *)GifFile);
- *Error = D_GIF_ERR_NO_SCRN_DSCR;
- return NULL;
- }
-
- GifFile->Error = 0;
-
- /* What version of GIF? */
- Private->gif89 = (Buf[GIF_VERSION_POS] == '9');
-
- return GifFile;
-}
-
-/******************************************************************************
- This routine should be called before any other DGif calls. Note that
- this routine is called automatically from DGif file open routines.
-******************************************************************************/
-int
-DGifGetScreenDesc(GifFileType *GifFile)
-{
- int BitsPerPixel;
- bool SortFlag;
- GifByteType Buf[3];
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- /* Put the screen descriptor into the file: */
- if (DGifGetWord(GifFile, &GifFile->SWidth) == GIF_ERROR ||
- DGifGetWord(GifFile, &GifFile->SHeight) == GIF_ERROR)
- return GIF_ERROR;
-
- if (READ(GifFile, Buf, 3) != 3) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- GifFreeMapObject(GifFile->SColorMap);
- GifFile->SColorMap = NULL;
- return GIF_ERROR;
- }
- GifFile->SColorResolution = (((Buf[0] & 0x70) + 1) >> 4) + 1;
- SortFlag = (Buf[0] & 0x08) != 0;
- BitsPerPixel = (Buf[0] & 0x07) + 1;
- GifFile->SBackGroundColor = Buf[1];
- GifFile->AspectByte = Buf[2];
- if (Buf[0] & 0x80) { /* Do we have global color map? */
- int i;
-
- GifFile->SColorMap = GifMakeMapObject(1 << BitsPerPixel, NULL);
- if (GifFile->SColorMap == NULL) {
- GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return GIF_ERROR;
- }
-
- /* Get the global color map: */
- GifFile->SColorMap->SortFlag = SortFlag;
- for (i = 0; i < GifFile->SColorMap->ColorCount; i++) {
- if (READ(GifFile, Buf, 3) != 3) {
- GifFreeMapObject(GifFile->SColorMap);
- GifFile->SColorMap = NULL;
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- GifFile->SColorMap->Colors[i].Red = Buf[0];
- GifFile->SColorMap->Colors[i].Green = Buf[1];
- GifFile->SColorMap->Colors[i].Blue = Buf[2];
- }
- } else {
- GifFile->SColorMap = NULL;
- }
-
- return GIF_OK;
-}
-
-/******************************************************************************
- This routine should be called before any attempt to read an image.
-******************************************************************************/
-int
-DGifGetRecordType(GifFileType *GifFile, GifRecordType* Type)
-{
- GifByteType Buf;
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- if (READ(GifFile, &Buf, 1) != 1) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
-
- switch (Buf) {
- case DESCRIPTOR_INTRODUCER:
- *Type = IMAGE_DESC_RECORD_TYPE;
- break;
- case EXTENSION_INTRODUCER:
- *Type = EXTENSION_RECORD_TYPE;
- break;
- case TERMINATOR_INTRODUCER:
- *Type = TERMINATE_RECORD_TYPE;
- break;
- default:
- *Type = UNDEFINED_RECORD_TYPE;
- GifFile->Error = D_GIF_ERR_WRONG_RECORD;
- return GIF_ERROR;
- }
-
- return GIF_OK;
-}
-
-/******************************************************************************
- This routine should be called before any attempt to read an image.
- Note it is assumed the Image desc. header has been read.
-******************************************************************************/
-int
-DGifGetImageDesc(GifFileType *GifFile, bool changeImageCount)
-{
- unsigned int BitsPerPixel;
- GifByteType Buf[3];
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
- SavedImage *sp;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- if (DGifGetWord(GifFile, &GifFile->Image.Left) == GIF_ERROR ||
- DGifGetWord(GifFile, &GifFile->Image.Top) == GIF_ERROR ||
- DGifGetWord(GifFile, &GifFile->Image.Width) == GIF_ERROR ||
- DGifGetWord(GifFile, &GifFile->Image.Height) == GIF_ERROR)
- return GIF_ERROR;
- if (READ(GifFile, Buf, 1) != 1) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- GifFreeMapObject(GifFile->Image.ColorMap);
- GifFile->Image.ColorMap = NULL;
- return GIF_ERROR;
- }
- BitsPerPixel = (Buf[0] & 0x07) + 1;
- GifFile->Image.Interlace = (Buf[0] & 0x40) ? true : false;
-
- /* Setup the colormap */
- if (GifFile->Image.ColorMap) {
- GifFreeMapObject(GifFile->Image.ColorMap);
- GifFile->Image.ColorMap = NULL;
- }
- /* Does this image have local color map? */
- if (Buf[0] & 0x80) {
- unsigned int i;
-
- GifFile->Image.ColorMap = GifMakeMapObject(1 << BitsPerPixel, NULL);
- if (GifFile->Image.ColorMap == NULL) {
- GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return GIF_ERROR;
- }
-
- /* Get the image local color map: */
- for (i = 0; i < GifFile->Image.ColorMap->ColorCount; i++) {
- if (READ(GifFile, Buf, 3) != 3) {
- GifFreeMapObject(GifFile->Image.ColorMap);
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- GifFile->Image.ColorMap = NULL;
- return GIF_ERROR;
- }
- GifFile->Image.ColorMap->Colors[i].Red = Buf[0];
- GifFile->Image.ColorMap->Colors[i].Green = Buf[1];
- GifFile->Image.ColorMap->Colors[i].Blue = Buf[2];
- }
- }
- // if (changeImageCount)
- {
- if (GifFile->SavedImages) {
- if ((GifFile->SavedImages = (SavedImage *)realloc(GifFile->SavedImages,
- sizeof(SavedImage) *
- (GifFile->ImageCount + 1))) == NULL) {
- GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return GIF_ERROR;
- }
- } else {
- if ((GifFile->SavedImages =
- (SavedImage *) malloc(sizeof(SavedImage))) == NULL) {
- GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return GIF_ERROR;
- }
- }
- }
- sp = &GifFile->SavedImages[GifFile->ImageCount];
- memcpy(&sp->ImageDesc, &GifFile->Image, sizeof(GifImageDesc));
- if (GifFile->Image.ColorMap != NULL) {
- sp->ImageDesc.ColorMap = GifMakeMapObject(
- GifFile->Image.ColorMap->ColorCount,
- GifFile->Image.ColorMap->Colors);
- if (sp->ImageDesc.ColorMap == NULL) {
- GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
- return GIF_ERROR;
- }
- }
- sp->RasterBits = (unsigned char *)NULL;
- sp->ExtensionBlockCount = 0;
- sp->ExtensionBlocks = (ExtensionBlock *) NULL;
- if (changeImageCount)
- GifFile->ImageCount++;
-
- Private->PixelCount = (long)GifFile->Image.Width *
- (long)GifFile->Image.Height;
-
- /* Reset decompress algorithm parameters. */
- (void)DGifSetupDecompress(GifFile);
-
- return GIF_OK;
-}
-
-/******************************************************************************
- Get one full scanned line (Line) of length LineLen from GIF file.
-******************************************************************************/
-int
-DGifGetLine(GifFileType *GifFile, GifPixelType *Line, int LineLen)
-{
- GifByteType *Dummy;
- GifFilePrivateType *Private = (GifFilePrivateType *) GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- if (!LineLen)
- LineLen = GifFile->Image.Width;
-
- if ((Private->PixelCount -= LineLen) > 0xffff0000UL) {
- GifFile->Error = D_GIF_ERR_DATA_TOO_BIG;
- return GIF_ERROR;
- }
-
- if (DGifDecompressLine(GifFile, Line, LineLen) == GIF_OK) {
- if (Private->PixelCount == 0) {
- /* We probably won't be called any more, so let's clean up
- * everything before we return: need to flush out all the
- * rest of image until an empty block (size 0)
- * detected. We use GetCodeNext.
- */
- do
- if (DGifGetCodeNext(GifFile, &Dummy) == GIF_ERROR)
- return GIF_ERROR;
- while (Dummy != NULL) ;
- }
- return GIF_OK;
- } else
- return GIF_ERROR;
-}
-
-/******************************************************************************
- Put one pixel (Pixel) into GIF file.
-******************************************************************************/
-int
-DGifGetPixel(GifFileType *GifFile, GifPixelType Pixel)
-{
- GifByteType *Dummy;
- GifFilePrivateType *Private = (GifFilePrivateType *) GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
- if (--Private->PixelCount > 0xffff0000UL)
- {
- GifFile->Error = D_GIF_ERR_DATA_TOO_BIG;
- return GIF_ERROR;
- }
-
- if (DGifDecompressLine(GifFile, &Pixel, 1) == GIF_OK) {
- if (Private->PixelCount == 0) {
- /* We probably won't be called any more, so let's clean up
- * everything before we return: need to flush out all the
- * rest of image until an empty block (size 0)
- * detected. We use GetCodeNext.
- */
- do
- if (DGifGetCodeNext(GifFile, &Dummy) == GIF_ERROR)
- return GIF_ERROR;
- while (Dummy != NULL) ;
- }
- return GIF_OK;
- } else
- return GIF_ERROR;
-}
-
-/******************************************************************************
- Get an extension block (see GIF manual) from GIF file. This routine only
- returns the first data block, and DGifGetExtensionNext should be called
- after this one until NULL extension is returned.
- The Extension should NOT be freed by the user (not dynamically allocated).
- Note it is assumed the Extension description header has been read.
-******************************************************************************/
-int
-DGifGetExtension(GifFileType *GifFile, int *ExtCode, GifByteType **Extension)
-{
- GifByteType Buf;
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- if (READ(GifFile, &Buf, 1) != 1) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- *ExtCode = Buf;
-
- return DGifGetExtensionNext(GifFile, Extension, ExtCode);
-}
-
-/******************************************************************************
- Get a following extension block (see GIF manual) from GIF file. This
- routine should be called until NULL Extension is returned.
- The Extension should NOT be freed by the user (not dynamically allocated).
-******************************************************************************/
-int
-DGifGetExtensionNext(GifFileType *GifFile, GifByteType ** Extension, int* ExtCode)
-{
- GifByteType Buf;
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- if (READ(GifFile, &Buf, 1) != 1) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- if (Buf > 0)
- {
- if (*ExtCode==GRAPHICS_EXT_FUNC_CODE)
- Buf=4;
- *Extension = Private->Buf; /* Use private unused buffer. */
- (*Extension)[0] = Buf; /* Pascal strings notation (pos. 0 is len.). */
- /* coverity[tainted_data] */
- if (READ(GifFile, &((*Extension)[1]), Buf) != Buf) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- } else
- *Extension = NULL;
-
- return GIF_OK;
-}
-
-/******************************************************************************
- Extract a Graphics Control Block from raw extension data
-******************************************************************************/
-
-int DGifExtensionToGCB(const size_t GifExtensionLength,
- const GifByteType *GifExtension,
- GraphicsControlBlock *GCB)
-{
- if (GifExtensionLength != 4) {
- return GIF_ERROR;
- }
-
- GCB->DisposalMode = (GifExtension[0] >> 2) & 0x07;
- GCB->UserInputFlag = (GifExtension[0] & 0x02) != 0;
- GCB->DelayTime = UNSIGNED_LITTLE_ENDIAN(GifExtension[1], GifExtension[2]);
- if (GifExtension[0] & 0x01)
- GCB->TransparentColor = (int)GifExtension[3];
- else
- GCB->TransparentColor = NO_TRANSPARENT_COLOR;
-
- return GIF_OK;
-}
-
-/******************************************************************************
- Extract the Graphics Control Block for a saved image, if it exists.
-******************************************************************************/
-
-int DGifSavedExtensionToGCB(GifFileType *GifFile,
- int ImageIndex, GraphicsControlBlock *GCB)
-{
- int i;
-
- if (ImageIndex < 0 || ImageIndex > GifFile->ImageCount - 1)
- return GIF_ERROR;
-
- GCB->DisposalMode = DISPOSAL_UNSPECIFIED;
- GCB->UserInputFlag = false;
- GCB->DelayTime = 0;
- GCB->TransparentColor = NO_TRANSPARENT_COLOR;
-
- for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; i++) {
- ExtensionBlock *ep = &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i];
- if (ep->Function == GRAPHICS_EXT_FUNC_CODE)
- return DGifExtensionToGCB(ep->ByteCount, ep->Bytes, GCB);
- }
-
- return GIF_ERROR;
-}
-
-/******************************************************************************
- This routine should be called last, to close the GIF file.
-******************************************************************************/
-int
-DGifCloseFile(GifFileType *GifFile)
-{
- GifFilePrivateType *Private;
-
- if (GifFile == NULL || GifFile->Private == NULL)
- return GIF_ERROR;
-
- if (GifFile->Image.ColorMap) {
- GifFreeMapObject(GifFile->Image.ColorMap);
- GifFile->Image.ColorMap = NULL;
- }
-
- if (GifFile->SColorMap) {
- GifFreeMapObject(GifFile->SColorMap);
- GifFile->SColorMap = NULL;
- }
-
- if (GifFile->SavedImages) {
- GifFreeSavedImages(GifFile);
- GifFile->SavedImages = NULL;
- }
-
- GifFreeExtensions(&GifFile->ExtensionBlockCount, &GifFile->ExtensionBlocks);
-
- Private = (GifFilePrivateType *) GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- if (Private->File && (fclose(Private->File) != 0)) {
- GifFile->Error = D_GIF_ERR_CLOSE_FAILED;
- return GIF_ERROR;
- }
-
- free((char *)GifFile->Private);
-
- /*
- * Without the #ifndef, we get spurious warnings because Coverity mistakenly
- * thinks the GIF structure is freed on an error return.
- */
-#ifndef __COVERITY__
- free(GifFile);
-#endif /* __COVERITY__ */
-
- return GIF_OK;
-}
-
-/******************************************************************************
- Get 2 bytes (word) from the given file:
-******************************************************************************/
-static int
-DGifGetWord(GifFileType *GifFile, GifWord *Word)
-{
- unsigned char c[2];
-
- if (READ(GifFile, c, 2) != 2) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
-
- *Word = (GifWord)UNSIGNED_LITTLE_ENDIAN(c[0], c[1]);
- return GIF_OK;
-}
-
-/******************************************************************************
- Get the image code in compressed form. This routine can be called if the
- information needed to be piped out as is. Obviously this is much faster
- than decoding and encoding again. This routine should be followed by calls
- to DGifGetCodeNext, until NULL block is returned.
- The block should NOT be freed by the user (not dynamically allocated).
-******************************************************************************/
-int
-DGifGetCode(GifFileType *GifFile, int *CodeSize, GifByteType **CodeBlock)
-{
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- *CodeSize = Private->BitsPerPixel;
-
- return DGifGetCodeNext(GifFile, CodeBlock);
-}
-
-/******************************************************************************
- Continue to get the image code in compressed form. This routine should be
- called until NULL block is returned.
- The block should NOT be freed by the user (not dynamically allocated).
-******************************************************************************/
-int
-DGifGetCodeNext(GifFileType *GifFile, GifByteType **CodeBlock)
-{
- GifByteType Buf;
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- /* coverity[tainted_data_argument] */
- if (READ(GifFile, &Buf, 1) != 1) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
-
- /* coverity[lower_bounds] */
- if (Buf > 0) {
- *CodeBlock = Private->Buf; /* Use private unused buffer. */
- (*CodeBlock)[0] = Buf; /* Pascal strings notation (pos. 0 is len.). */
- /* coverity[tainted_data] */
- if (READ(GifFile, &((*CodeBlock)[1]), Buf) != Buf) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- } else {
- *CodeBlock = NULL;
- Private->Buf[0] = 0; /* Make sure the buffer is empty! */
- Private->PixelCount = 0; /* And local info. indicate image read. */
- }
-
- return GIF_OK;
-}
-
-/******************************************************************************
- Setup the LZ decompression for this image:
-******************************************************************************/
-static int
-DGifSetupDecompress(GifFileType *GifFile)
-{
- int i, BitsPerPixel;
- GifByteType CodeSize;
- GifPrefixType *Prefix;
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- READ(GifFile, &CodeSize, 1); /* Read Code size from file. */
- BitsPerPixel = CodeSize;
-
- Private->Buf[0] = 0; /* Input Buffer empty. */
- Private->BitsPerPixel = BitsPerPixel;
- Private->ClearCode = (1 << BitsPerPixel);
- Private->EOFCode = Private->ClearCode + 1;
- Private->RunningCode = Private->EOFCode + 1;
- Private->RunningBits = BitsPerPixel + 1; /* Number of bits per code. */
- Private->MaxCode1 = 1 << Private->RunningBits; /* Max. code + 1. */
- Private->StackPtr = 0; /* No pixels on the pixel stack. */
- Private->LastCode = NO_SUCH_CODE;
- Private->CrntShiftState = 0; /* No information in CrntShiftDWord. */
- Private->CrntShiftDWord = 0;
-
- Prefix = Private->Prefix;
- for (i = 0; i <= LZ_MAX_CODE; i++)
- Prefix[i] = NO_SUCH_CODE;
-
- return GIF_OK;
-}
-
-/******************************************************************************
- The LZ decompression routine:
- This version decompress the given GIF file into Line of length LineLen.
- This routine can be called few times (one per scan line, for example), in
- order the complete the whole image.
-******************************************************************************/
-static int
-DGifDecompressLine(GifFileType *GifFile, GifPixelType *Line, int LineLen)
-{
- int i = 0;
- int j, CrntCode, EOFCode, ClearCode, CrntPrefix, LastCode, StackPtr;
- GifByteType *Stack, *Suffix;
- GifPrefixType *Prefix;
- GifFilePrivateType *Private = (GifFilePrivateType *) GifFile->Private;
-
- StackPtr = Private->StackPtr;
- Prefix = Private->Prefix;
- Suffix = Private->Suffix;
- Stack = Private->Stack;
- EOFCode = Private->EOFCode;
- ClearCode = Private->ClearCode;
- LastCode = Private->LastCode;
-
- if (StackPtr > LZ_MAX_CODE) {
- return GIF_ERROR;
- }
-
- if (StackPtr != 0) {
- /* Let pop the stack off before continueing to read the GIF file: */
- while (StackPtr != 0 && i < LineLen)
- Line[i++] = Stack[--StackPtr];
- }
-
- while (i < LineLen) { /* Decode LineLen items. */
- if (DGifDecompressInput(GifFile, &CrntCode) == GIF_ERROR)
- return GIF_ERROR;
-
- if (CrntCode == EOFCode) {
- /* Note however that usually we will not be here as we will stop
- * decoding as soon as we got all the pixel, or EOF code will
- * not be read at all, and DGifGetLine/Pixel clean everything. */
- GifFile->Error = D_GIF_ERR_EOF_TOO_SOON;
- return GIF_ERROR;
- } else if (CrntCode == ClearCode) {
- /* We need to start over again: */
- for (j = 0; j <= LZ_MAX_CODE; j++)
- Prefix[j] = NO_SUCH_CODE;
- Private->RunningCode = Private->EOFCode + 1;
- Private->RunningBits = Private->BitsPerPixel + 1;
- Private->MaxCode1 = 1 << Private->RunningBits;
- LastCode = Private->LastCode = NO_SUCH_CODE;
- } else {
- /* Its regular code - if in pixel range simply add it to output
- * stream, otherwise trace to codes linked list until the prefix
- * is in pixel range: */
- if (CrntCode < ClearCode) {
- /* This is simple - its pixel scalar, so add it to output: */
- Line[i++] = CrntCode;
- } else {
- /* Its a code to needed to be traced: trace the linked list
- * until the prefix is a pixel, while pushing the suffix
- * pixels on our stack. If we done, pop the stack in reverse
- * (thats what stack is good for!) order to output. */
- if (Prefix[CrntCode] == NO_SUCH_CODE) {
- /* Only allowed if CrntCode is exactly the running code:
- * In that case CrntCode = XXXCode, CrntCode or the
- * prefix code is last code and the suffix char is
- * exactly the prefix of last code! */
- if (CrntCode == Private->RunningCode - 2) {
- CrntPrefix = LastCode;
- Suffix[Private->RunningCode - 2] =
- Stack[StackPtr++] = DGifGetPrefixChar(Prefix,
- LastCode,
- ClearCode);
- } else {
- GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
- return GIF_ERROR;
- }
- } else
- CrntPrefix = CrntCode;
-
- /* Now (if image is O.K.) we should not get a NO_SUCH_CODE
- * during the trace. As we might loop forever, in case of
- * defective image, we use StackPtr as loop counter and stop
- * before overflowing Stack[]. */
- while (StackPtr < LZ_MAX_CODE &&
- CrntPrefix > ClearCode && CrntPrefix <= LZ_MAX_CODE) {
- Stack[StackPtr++] = Suffix[CrntPrefix];
- CrntPrefix = Prefix[CrntPrefix];
- }
- if (StackPtr >= LZ_MAX_CODE || CrntPrefix > LZ_MAX_CODE) {
- GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
- return GIF_ERROR;
- }
- /* Push the last character on stack: */
- Stack[StackPtr++] = CrntPrefix;
-
- /* Now lets pop all the stack into output: */
- while (StackPtr != 0 && i < LineLen)
- Line[i++] = Stack[--StackPtr];
- }
- if (LastCode != NO_SUCH_CODE && Prefix[Private->RunningCode - 2] == NO_SUCH_CODE) {
- Prefix[Private->RunningCode - 2] = LastCode;
-
- if (CrntCode == Private->RunningCode - 2) {
- /* Only allowed if CrntCode is exactly the running code:
- * In that case CrntCode = XXXCode, CrntCode or the
- * prefix code is last code and the suffix char is
- * exactly the prefix of last code! */
- Suffix[Private->RunningCode - 2] =
- DGifGetPrefixChar(Prefix, LastCode, ClearCode);
- } else {
- Suffix[Private->RunningCode - 2] =
- DGifGetPrefixChar(Prefix, CrntCode, ClearCode);
- }
- }
- LastCode = CrntCode;
- }
- }
-
- Private->LastCode = LastCode;
- Private->StackPtr = StackPtr;
-
- return GIF_OK;
-}
-
-/******************************************************************************
- Routine to trace the Prefixes linked list until we get a prefix which is
- not code, but a pixel value (less than ClearCode). Returns that pixel value.
- If image is defective, we might loop here forever, so we limit the loops to
- the maximum possible if image O.k. - LZ_MAX_CODE times.
-******************************************************************************/
-static int
-DGifGetPrefixChar(GifPrefixType *Prefix, int Code, int ClearCode)
-{
- int i = 0;
-
- while (Code > ClearCode && i++ <= LZ_MAX_CODE) {
- if (Code > LZ_MAX_CODE) {
- return NO_SUCH_CODE;
- }
- Code = Prefix[Code];
- }
- return Code;
-}
-
-/******************************************************************************
- Interface for accessing the LZ codes directly. Set Code to the real code
- (12bits), or to -1 if EOF code is returned.
-******************************************************************************/
-int
-DGifGetLZCodes(GifFileType *GifFile, int *Code)
-{
- GifByteType *CodeBlock;
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- if (!IS_READABLE(Private)) {
- /* This file was NOT open for reading: */
- GifFile->Error = D_GIF_ERR_NOT_READABLE;
- return GIF_ERROR;
- }
-
- if (DGifDecompressInput(GifFile, Code) == GIF_ERROR)
- return GIF_ERROR;
-
- if (*Code == Private->EOFCode) {
- /* Skip rest of codes (hopefully only NULL terminating block): */
- do {
- if (DGifGetCodeNext(GifFile, &CodeBlock) == GIF_ERROR)
- return GIF_ERROR;
- } while (CodeBlock != NULL) ;
-
- *Code = -1;
- } else if (*Code == Private->ClearCode) {
- /* We need to start over again: */
- Private->RunningCode = Private->EOFCode + 1;
- Private->RunningBits = Private->BitsPerPixel + 1;
- Private->MaxCode1 = 1 << Private->RunningBits;
- }
-
- return GIF_OK;
-}
-
-/******************************************************************************
- The LZ decompression input routine:
- This routine is responsable for the decompression of the bit stream from
- 8 bits (bytes) packets, into the real codes.
- Returns GIF_OK if read successfully.
-******************************************************************************/
-static int
-DGifDecompressInput(GifFileType *GifFile, int *Code)
-{
- static const unsigned short CodeMasks[] = {
- 0x0000, 0x0001, 0x0003, 0x0007,
- 0x000f, 0x001f, 0x003f, 0x007f,
- 0x00ff, 0x01ff, 0x03ff, 0x07ff,
- 0x0fff
- };
-
- GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
-
- GifByteType NextByte;
-
- /* The image can't contain more than LZ_BITS per code. */
- if (Private->RunningBits > LZ_BITS) {
- GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
- return GIF_ERROR;
- }
-
- while (Private->CrntShiftState < Private->RunningBits) {
- /* Needs to get more bytes from input stream for next code: */
- if (DGifBufferedInput(GifFile, Private->Buf, &NextByte) == GIF_ERROR) {
- return GIF_ERROR;
- }
- Private->CrntShiftDWord |=
- ((unsigned long)NextByte) << Private->CrntShiftState;
- Private->CrntShiftState += 8;
- }
- *Code = Private->CrntShiftDWord & CodeMasks[Private->RunningBits];
-
- Private->CrntShiftDWord >>= Private->RunningBits;
- Private->CrntShiftState -= Private->RunningBits;
-
- /* If code cannot fit into RunningBits bits, must raise its size. Note
- * however that codes above 4095 are used for special signaling.
- * If we're using LZ_BITS bits already and we're at the max code, just
- * keep using the table as it is, don't increment Private->RunningCode.
- */
- if (Private->RunningCode < LZ_MAX_CODE + 2 &&
- ++Private->RunningCode > Private->MaxCode1 &&
- Private->RunningBits < LZ_BITS) {
- Private->MaxCode1 <<= 1;
- Private->RunningBits++;
- }
- return GIF_OK;
-}
-
-/******************************************************************************
- This routines read one GIF data block at a time and buffers it internally
- so that the decompression routine could access it.
- The routine returns the next byte from its internal buffer (or read next
- block in if buffer empty) and returns GIF_OK if succesful.
-******************************************************************************/
-static int
-DGifBufferedInput(GifFileType *GifFile, GifByteType *Buf, GifByteType *NextByte)
-{
- if (Buf[0] == 0) {
- /* Needs to read the next buffer - this one is empty: */
- if (READ(GifFile, Buf, 1) != 1) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- /* There shouldn't be any empty data blocks here as the LZW spec
- * says the LZW termination code should come first. Therefore we
- * shouldn't be inside this routine at that point.
- */
- if (Buf[0] == 0) {
- GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
- return GIF_ERROR;
- }
- /* There shouldn't be any empty data blocks here as the LZW spec
- * says the LZW termination code should come first. Therefore we
- * shouldn't be inside this routine at that point.
- */
- if (Buf[0] == 0) {
- GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
- return GIF_ERROR;
- }
- if (READ(GifFile, &Buf[1], Buf[0]) != Buf[0]) {
- GifFile->Error = D_GIF_ERR_READ_FAILED;
- return GIF_ERROR;
- }
- *NextByte = Buf[1];
- Buf[1] = 2; /* We use now the second place as last char read! */
- Buf[0]--;
- } else {
- *NextByte = Buf[Buf[1]++];
- Buf[0]--;
- }
-
- return GIF_OK;
-}
-
-/******************************************************************************
- This routine reads an entire GIF into core, hanging all its state info off
- the GifFileType pointer. Call DGifOpenFileName() or DGifOpenFileHandle()
- first to initialize I/O. Its inverse is EGifSpew().
-
-int
-DGifSlurp(GifFileType *GifFile)
-{
- size_t ImageSize;
- GifRecordType RecordType;
- SavedImage *sp;
- GifByteType *ExtData;
- int ExtFunction;
-
- GifFile->ExtensionBlocks = NULL;
- GifFile->ExtensionBlockCount = 0;
-
- do {
- if (DGifGetRecordType(GifFile, &RecordType) == GIF_ERROR)
- return (GIF_ERROR);
- switch (RecordType) {
- case IMAGE_DESC_RECORD_TYPE:
- if (DGifGetImageDesc(GifFile) == GIF_ERROR)
- return (GIF_ERROR);
-
- sp = &GifFile->SavedImages[GifFile->ImageCount - 1];
- if (GifFile->ExtensionBlocks) {
- sp->ExtensionBlocks = GifFile->ExtensionBlocks;
- sp->ExtensionBlockCount = GifFile->ExtensionBlockCount;
-
- GifFile->ExtensionBlocks = NULL;
- GifFile->ExtensionBlockCount = 0;
- }
- // Allocate memory for the image
- if (sp->ImageDesc.Width < 0 && sp->ImageDesc.Height < 0 &&
- sp->ImageDesc.Width > (INT_MAX / sp->ImageDesc.Height)) {
- return GIF_ERROR;
- }
- ImageSize = sp->ImageDesc.Width * sp->ImageDesc.Height;
-
- if (ImageSize > (SIZE_MAX / sizeof(GifPixelType))) {
- return GIF_ERROR;
- }
- sp->RasterBits = (unsigned char *)malloc(ImageSize *
- sizeof(GifPixelType));
-
- if (sp->RasterBits == NULL) {
- return GIF_ERROR;
- }
-
- if (sp->ImageDesc.Interlace)
- {
- int i, j;
- // The way an interlaced image should be read - * offsets and jumps...
-
- int InterlacedOffset[] = { 0, 4, 2, 1 };
- int InterlacedJumps[] = { 8, 8, 4, 2 };
- // Need to perform 4 passes on the image
- for (i = 0; i < 4; i++)
- for (j = InterlacedOffset[i];
- j < sp->ImageDesc.Height;
- j += InterlacedJumps[i]) {
- if (DGifGetLine(GifFile,
- sp->RasterBits+j*sp->ImageDesc.Width,
- sp->ImageDesc.Width) == GIF_ERROR)
- return GIF_ERROR;
- }
- }
- else
- {
- if (DGifGetLine(GifFile,sp->RasterBits,ImageSize)==GIF_ERROR)
- return (GIF_ERROR);
- }
- break;
-
- case EXTENSION_RECORD_TYPE:
- if (DGifGetExtension(GifFile,&ExtFunction,&ExtData) == GIF_ERROR)
- return (GIF_ERROR);
- // Create an extension block with our data
- if (GifAddExtensionBlock(&GifFile->ExtensionBlockCount,
- &GifFile->ExtensionBlocks,
- ExtFunction, ExtData[0], &ExtData[1])
- == GIF_ERROR)
- return (GIF_ERROR);
- while (ExtData != NULL) {
- if (DGifGetExtensionNext(GifFile, &ExtData, &ExtFunction) == GIF_ERROR)
- return (GIF_ERROR);
- // Continue the extension block /
- if (ExtData != NULL)
- if (GifAddExtensionBlock(&GifFile->ExtensionBlockCount,
- &GifFile->ExtensionBlocks,
- CONTINUE_EXT_FUNC_CODE,
- ExtData[0], &ExtData[1]) == GIF_ERROR)
- return (GIF_ERROR);
- }
- break;
-
- case TERMINATE_RECORD_TYPE:
- break;
-
- default: // Should be trapped by DGifGetRecordType
- break;
- }
- } while (RecordType != TERMINATE_RECORD_TYPE);
-
- return (GIF_OK);
-}
-
- end */
diff --git a/TMessagesProj/jni/giflib/gif_hash.c b/TMessagesProj/jni/giflib/gif_hash.c
deleted file mode 100755
index 61a4d139c..000000000
--- a/TMessagesProj/jni/giflib/gif_hash.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*****************************************************************************
-
-gif_hash.c -- module to support the following operations:
-
-1. InitHashTable - initialize hash table.
-2. ClearHashTable - clear the hash table to an empty state.
-2. InsertHashTable - insert one item into data structure.
-3. ExistsHashTable - test if item exists in data structure.
-
-This module is used to hash the GIF codes during encoding.
-
-*****************************************************************************/
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include "gif_lib.h"
-#include "gif_hash.h"
-#include "gif_lib_private.h"
-
-/* #define DEBUG_HIT_RATE Debug number of misses per hash Insert/Exists. */
-
-#ifdef DEBUG_HIT_RATE
-static long NumberOfTests = 0,
- NumberOfMisses = 0;
-#endif /* DEBUG_HIT_RATE */
-
-static int KeyItem(uint32_t Item);
-
-/******************************************************************************
- Initialize HashTable - allocate the memory needed and clear it. *
-******************************************************************************/
-GifHashTableType *_InitHashTable(void)
-{
- GifHashTableType *HashTable;
-
- if ((HashTable = (GifHashTableType *) malloc(sizeof(GifHashTableType)))
- == NULL)
- return NULL;
-
- _ClearHashTable(HashTable);
-
- return HashTable;
-}
-
-/******************************************************************************
- Routine to clear the HashTable to an empty state. *
- This part is a little machine depended. Use the commented part otherwise. *
-******************************************************************************/
-void _ClearHashTable(GifHashTableType *HashTable)
-{
- memset(HashTable -> HTable, 0xFF, HT_SIZE * sizeof(uint32_t));
-}
-
-/******************************************************************************
- Routine to insert a new Item into the HashTable. The data is assumed to be *
- new one. *
-******************************************************************************/
-void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code)
-{
- int HKey = KeyItem(Key);
- uint32_t *HTable = HashTable -> HTable;
-
-#ifdef DEBUG_HIT_RATE
- NumberOfTests++;
- NumberOfMisses++;
-#endif /* DEBUG_HIT_RATE */
-
- while (HT_GET_KEY(HTable[HKey]) != 0xFFFFFL) {
-#ifdef DEBUG_HIT_RATE
- NumberOfMisses++;
-#endif /* DEBUG_HIT_RATE */
- HKey = (HKey + 1) & HT_KEY_MASK;
- }
- HTable[HKey] = HT_PUT_KEY(Key) | HT_PUT_CODE(Code);
-}
-
-/******************************************************************************
- Routine to test if given Key exists in HashTable and if so returns its code *
- Returns the Code if key was found, -1 if not. *
-******************************************************************************/
-int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key)
-{
- int HKey = KeyItem(Key);
- uint32_t *HTable = HashTable -> HTable, HTKey;
-
-#ifdef DEBUG_HIT_RATE
- NumberOfTests++;
- NumberOfMisses++;
-#endif /* DEBUG_HIT_RATE */
-
- while ((HTKey = HT_GET_KEY(HTable[HKey])) != 0xFFFFFL) {
-#ifdef DEBUG_HIT_RATE
- NumberOfMisses++;
-#endif /* DEBUG_HIT_RATE */
- if (Key == HTKey) return HT_GET_CODE(HTable[HKey]);
- HKey = (HKey + 1) & HT_KEY_MASK;
- }
-
- return -1;
-}
-
-/******************************************************************************
- Routine to generate an HKey for the hashtable out of the given unique key. *
- The given Key is assumed to be 20 bits as follows: lower 8 bits are the *
- new postfix character, while the upper 12 bits are the prefix code. *
- Because the average hit ratio is only 2 (2 hash references per entry), *
- evaluating more complex keys (such as twin prime keys) does not worth it! *
-******************************************************************************/
-static int KeyItem(uint32_t Item)
-{
- return ((Item >> 12) ^ Item) & HT_KEY_MASK;
-}
-
-#ifdef DEBUG_HIT_RATE
-/******************************************************************************
- Debugging routine to print the hit ratio - number of times the hash table *
- was tested per operation. This routine was used to test the KeyItem routine *
-******************************************************************************/
-void HashTablePrintHitRatio(void)
-{
- printf("Hash Table Hit Ratio is %ld/%ld = %ld%%.\n",
- NumberOfMisses, NumberOfTests,
- NumberOfMisses * 100 / NumberOfTests);
-}
-#endif /* DEBUG_HIT_RATE */
-
-/* end */
diff --git a/TMessagesProj/jni/giflib/gif_hash.h b/TMessagesProj/jni/giflib/gif_hash.h
deleted file mode 100755
index ac20a43cb..000000000
--- a/TMessagesProj/jni/giflib/gif_hash.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/******************************************************************************
-
-gif_hash.h - magfic constants and declarations for GIF LZW
-
-******************************************************************************/
-
-#ifndef _GIF_HASH_H_
-#define _GIF_HASH_H_
-
-#include
-#include
-
-#define HT_SIZE 8192 /* 12bits = 4096 or twice as big! */
-#define HT_KEY_MASK 0x1FFF /* 13bits keys */
-#define HT_KEY_NUM_BITS 13 /* 13bits keys */
-#define HT_MAX_KEY 8191 /* 13bits - 1, maximal code possible */
-#define HT_MAX_CODE 4095 /* Biggest code possible in 12 bits. */
-
-/* The 32 bits of the long are divided into two parts for the key & code: */
-/* 1. The code is 12 bits as our compression algorithm is limited to 12bits */
-/* 2. The key is 12 bits Prefix code + 8 bit new char or 20 bits. */
-/* The key is the upper 20 bits. The code is the lower 12. */
-#define HT_GET_KEY(l) (l >> 12)
-#define HT_GET_CODE(l) (l & 0x0FFF)
-#define HT_PUT_KEY(l) (l << 12)
-#define HT_PUT_CODE(l) (l & 0x0FFF)
-
-typedef struct GifHashTableType {
- uint32_t HTable[HT_SIZE];
-} GifHashTableType;
-
-GifHashTableType *_InitHashTable(void);
-void _ClearHashTable(GifHashTableType *HashTable);
-void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code);
-int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key);
-
-#endif /* _GIF_HASH_H_ */
-
-/* end */
diff --git a/TMessagesProj/jni/giflib/gif_lib.h b/TMessagesProj/jni/giflib/gif_lib.h
deleted file mode 100755
index b7aa9301d..000000000
--- a/TMessagesProj/jni/giflib/gif_lib.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/******************************************************************************
-
-gif_lib.h - service library for decoding and encoding GIF images
-
-*****************************************************************************/
-
-#ifndef _GIF_LIB_H_
-#define _GIF_LIB_H_ 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#define GIFLIB_MAJOR 5
-#define GIFLIB_MINOR 0
-#define GIFLIB_RELEASE 5
-
-#define GIF_ERROR 0
-#define GIF_OK 1
-
-#include
-#include
-
-#define GIF_STAMP "GIFVER" /* First chars in file - GIF stamp. */
-#define GIF_STAMP_LEN sizeof(GIF_STAMP) - 1
-#define GIF_VERSION_POS 3 /* Version first character in stamp. */
-#define GIF87_STAMP "GIF87a" /* First chars in file - GIF stamp. */
-#define GIF89_STAMP "GIF89a" /* First chars in file - GIF stamp. */
-
-typedef unsigned char GifPixelType;
-typedef unsigned char *GifRowType;
-typedef unsigned char GifByteType;
-typedef unsigned int GifPrefixType;
-typedef int GifWord;
-
-typedef struct GifColorType {
- GifByteType Red, Green, Blue;
-} GifColorType;
-
-typedef struct ColorMapObject {
- int ColorCount;
- int BitsPerPixel;
- bool SortFlag;
- GifColorType *Colors; /* on malloc(3) heap */
-} ColorMapObject;
-
-typedef struct GifImageDesc {
- GifWord Left, Top, Width, Height; /* Current image dimensions. */
- bool Interlace; /* Sequential/Interlaced lines. */
- ColorMapObject *ColorMap; /* The local color map */
-} GifImageDesc;
-
-typedef struct ExtensionBlock {
- int ByteCount;
- GifByteType *Bytes; /* on malloc(3) heap */
- int Function; /* The block function code */
-#define CONTINUE_EXT_FUNC_CODE 0x00 /* continuation subblock */
-#define COMMENT_EXT_FUNC_CODE 0xfe /* comment */
-#define GRAPHICS_EXT_FUNC_CODE 0xf9 /* graphics control (GIF89) */
-#define PLAINTEXT_EXT_FUNC_CODE 0x01 /* plaintext */
-#define APPLICATION_EXT_FUNC_CODE 0xff /* application block */
-} ExtensionBlock;
-
-typedef struct SavedImage {
- GifImageDesc ImageDesc;
- GifByteType *RasterBits; /* on malloc(3) heap */
- int ExtensionBlockCount; /* Count of extensions before image */
- ExtensionBlock *ExtensionBlocks; /* Extensions before image */
-} SavedImage;
-
-typedef struct GifFileType {
- GifWord SWidth, SHeight; /* Size of virtual canvas */
- GifWord SColorResolution; /* How many colors can we generate? */
- GifWord SBackGroundColor; /* Background color for virtual canvas */
- GifByteType AspectByte; /* Used to compute pixel aspect ratio */
- ColorMapObject *SColorMap; /* Global colormap, NULL if nonexistent. */
- int ImageCount; /* Number of current image (both APIs) */
- GifImageDesc Image; /* Current image (low-level API) */
- SavedImage *SavedImages; /* Image sequence (high-level API) */
- int ExtensionBlockCount; /* Count extensions past last image */
- ExtensionBlock *ExtensionBlocks; /* Extensions past last image */
- int Error; /* Last error condition reported */
- void *UserData; /* hook to attach user data (TVT) */
- void *Private; /* Don't mess with this! */
-} GifFileType;
-
-#define GIF_ASPECT_RATIO(n) ((n)+15.0/64.0)
-
-typedef enum {
- UNDEFINED_RECORD_TYPE,
- SCREEN_DESC_RECORD_TYPE,
- IMAGE_DESC_RECORD_TYPE, /* Begin with ',' */
- EXTENSION_RECORD_TYPE, /* Begin with '!' */
- TERMINATE_RECORD_TYPE /* Begin with ';' */
-} GifRecordType;
-
-/* func type to read gif data from arbitrary sources (TVT) */
-typedef int (*InputFunc) (GifFileType *, GifByteType *, int);
-
-/* func type to write gif data to arbitrary targets.
- * Returns count of bytes written. (MRB)
- */
-typedef int (*OutputFunc) (GifFileType *, const GifByteType *, int);
-
-/******************************************************************************
- GIF89 structures
-******************************************************************************/
-
-typedef struct GraphicsControlBlock {
- int DisposalMode;
-#define DISPOSAL_UNSPECIFIED 0 /* No disposal specified. */
-#define DISPOSE_DO_NOT 1 /* Leave image in place */
-#define DISPOSE_BACKGROUND 2 /* Set area too background color */
-#define DISPOSE_PREVIOUS 3 /* Restore to previous content */
- bool UserInputFlag; /* User confirmation required before disposal */
- int DelayTime; /* pre-display delay in 0.01sec units */
- int TransparentColor; /* Palette index for transparency, -1 if none */
-#define NO_TRANSPARENT_COLOR -1
-} GraphicsControlBlock;
-
-/******************************************************************************
- GIF encoding routines
-******************************************************************************/
-
-/* Main entry points */
-GifFileType *EGifOpenFileName(const char *GifFileName,
- const bool GifTestExistence, int *Error);
-GifFileType *EGifOpenFileHandle(const int GifFileHandle, int *Error);
-GifFileType *EGifOpen(void *userPtr, OutputFunc writeFunc, int *Error);
-int EGifSpew(GifFileType * GifFile);
-char *EGifGetGifVersion(GifFileType *GifFile); /* new in 5.x */
-int EGifCloseFile(GifFileType * GifFile);
-
-#define E_GIF_ERR_OPEN_FAILED 1 /* And EGif possible errors. */
-#define E_GIF_ERR_WRITE_FAILED 2
-#define E_GIF_ERR_HAS_SCRN_DSCR 3
-#define E_GIF_ERR_HAS_IMAG_DSCR 4
-#define E_GIF_ERR_NO_COLOR_MAP 5
-#define E_GIF_ERR_DATA_TOO_BIG 6
-#define E_GIF_ERR_NOT_ENOUGH_MEM 7
-#define E_GIF_ERR_DISK_IS_FULL 8
-#define E_GIF_ERR_CLOSE_FAILED 9
-#define E_GIF_ERR_NOT_WRITEABLE 10
-
-/* These are legacy. You probably do not want to call them directly */
-int EGifPutScreenDesc(GifFileType *GifFile,
- const int GifWidth, const int GifHeight,
- const int GifColorRes,
- const int GifBackGround,
- const ColorMapObject *GifColorMap);
-int EGifPutImageDesc(GifFileType *GifFile,
- const int GifLeft, const int GifTop,
- const int GifWidth, const int GifHeight,
- const bool GifInterlace,
- const ColorMapObject *GifColorMap);
-void EGifSetGifVersion(GifFileType *GifFile, const bool gif89);
-int EGifPutLine(GifFileType *GifFile, GifPixelType *GifLine,
- int GifLineLen);
-int EGifPutPixel(GifFileType *GifFile, const GifPixelType GifPixel);
-int EGifPutComment(GifFileType *GifFile, const char *GifComment);
-int EGifPutExtensionLeader(GifFileType *GifFile, const int GifExtCode);
-int EGifPutExtensionBlock(GifFileType *GifFile,
- const int GifExtLen, const void *GifExtension);
-int EGifPutExtensionTrailer(GifFileType *GifFile);
-int EGifPutExtension(GifFileType *GifFile, const int GifExtCode,
- const int GifExtLen,
- const void *GifExtension);
-int EGifPutCode(GifFileType *GifFile, int GifCodeSize,
- const GifByteType *GifCodeBlock);
-int EGifPutCodeNext(GifFileType *GifFile,
- const GifByteType *GifCodeBlock);
-
-/******************************************************************************
- GIF decoding routines
-******************************************************************************/
-
-/* Main entry points */
-GifFileType *DGifOpenFileName(const char *GifFileName, int *Error);
-GifFileType *DGifOpenFileHandle(int GifFileHandle, int *Error);
-int DGifSlurp(GifFileType * GifFile);
-GifFileType *DGifOpen(void *userPtr, InputFunc readFunc, int *Error); /* new one (TVT) */
-int DGifCloseFile(GifFileType * GifFile);
-
-#define D_GIF_ERR_OPEN_FAILED 101 /* And DGif possible errors. */
-#define D_GIF_ERR_READ_FAILED 102
-#define D_GIF_ERR_NOT_GIF_FILE 103
-#define D_GIF_ERR_NO_SCRN_DSCR 104
-#define D_GIF_ERR_NO_IMAG_DSCR 105
-#define D_GIF_ERR_NO_COLOR_MAP 106
-#define D_GIF_ERR_WRONG_RECORD 107
-#define D_GIF_ERR_DATA_TOO_BIG 108
-#define D_GIF_ERR_NOT_ENOUGH_MEM 109
-#define D_GIF_ERR_CLOSE_FAILED 110
-#define D_GIF_ERR_NOT_READABLE 111
-#define D_GIF_ERR_IMAGE_DEFECT 112
-#define D_GIF_ERR_EOF_TOO_SOON 113
-
-/* These are legacy. You probably do not want to call them directly */
-int DGifGetScreenDesc(GifFileType *GifFile);
-int DGifGetRecordType(GifFileType *GifFile, GifRecordType *GifType);
-int DGifGetImageDesc(GifFileType *GifFile, bool changeImageCount);
-int DGifGetLine(GifFileType *GifFile, GifPixelType *GifLine, int GifLineLen);
-int DGifGetPixel(GifFileType *GifFile, GifPixelType GifPixel);
-int DGifGetComment(GifFileType *GifFile, char *GifComment);
-int DGifGetExtension(GifFileType *GifFile, int *GifExtCode,
- GifByteType **GifExtension);
-int DGifGetExtensionNext(GifFileType *GifFile, GifByteType **GifExtension,int* ExtCode);
-int DGifGetCode(GifFileType *GifFile, int *GifCodeSize,
- GifByteType **GifCodeBlock);
-int DGifGetCodeNext(GifFileType *GifFile, GifByteType **GifCodeBlock);
-int DGifGetLZCodes(GifFileType *GifFile, int *GifCode);
-
-
-/******************************************************************************
- Color table quantization (deprecated)
-******************************************************************************/
-int GifQuantizeBuffer(unsigned int Width, unsigned int Height,
- int *ColorMapSize, GifByteType * RedInput,
- GifByteType * GreenInput, GifByteType * BlueInput,
- GifByteType * OutputBuffer,
- GifColorType * OutputColorMap);
-
-/******************************************************************************
- Error handling and reporting.
-******************************************************************************/
-extern char *GifErrorString(int ErrorCode); /* new in 2012 - ESR */
-
-/*****************************************************************************
- Everything below this point is new after version 1.2, supporting `slurp
- mode' for doing I/O in two big belts with all the image-bashing in core.
-******************************************************************************/
-
-/******************************************************************************
- Color map handling from gif_alloc.c
-******************************************************************************/
-
-extern ColorMapObject *GifMakeMapObject(int ColorCount,
- const GifColorType *ColorMap);
-extern void GifFreeMapObject(ColorMapObject *Object);
-extern ColorMapObject *GifUnionColorMap(const ColorMapObject *ColorIn1,
- const ColorMapObject *ColorIn2,
- GifPixelType ColorTransIn2[]);
-extern int GifBitSize(int n);
-
-/******************************************************************************
- Support for the in-core structures allocation (slurp mode).
-******************************************************************************/
-
-extern void GifApplyTranslation(SavedImage *Image, GifPixelType Translation[]);
-extern int GifAddExtensionBlock(int *ExtensionBlock_Count,
- ExtensionBlock **ExtensionBlocks,
- int Function,
- unsigned int Len, unsigned char ExtData[]);
-extern void GifFreeExtensions(int *ExtensionBlock_Count,
- ExtensionBlock **ExtensionBlocks);
-extern SavedImage *GifMakeSavedImage(GifFileType *GifFile,
- const SavedImage *CopyFrom);
-extern void GifFreeSavedImages(GifFileType *GifFile);
-
-/******************************************************************************
- 5.x functions for GIF89 graphics control blocks
-******************************************************************************/
-
-int DGifExtensionToGCB(const size_t GifExtensionLength,
- const GifByteType *GifExtension,
- GraphicsControlBlock *GCB);
-size_t EGifGCBToExtension(const GraphicsControlBlock *GCB,
- GifByteType *GifExtension);
-
-int DGifSavedExtensionToGCB(GifFileType *GifFile,
- int ImageIndex,
- GraphicsControlBlock *GCB);
-int EGifGCBToSavedExtension(const GraphicsControlBlock *GCB,
- GifFileType *GifFile,
- int ImageIndex);
-
-/******************************************************************************
- The library's internal utility font
-******************************************************************************/
-
-#define GIF_FONT_WIDTH 8
-#define GIF_FONT_HEIGHT 8
-extern const unsigned char GifAsciiTable8x8[][GIF_FONT_WIDTH];
-
-extern void GifDrawText8x8(SavedImage *Image,
- const int x, const int y,
- const char *legend, const int color);
-
-extern void GifDrawBox(SavedImage *Image,
- const int x, const int y,
- const int w, const int d, const int color);
-
-extern void GifDrawRectangle(SavedImage *Image,
- const int x, const int y,
- const int w, const int d, const int color);
-
-extern void GifDrawBoxedText8x8(SavedImage *Image,
- const int x, const int y,
- const char *legend,
- const int border, const int bg, const int fg);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* _GIF_LIB_H */
-
-/* end */
diff --git a/TMessagesProj/jni/giflib/gif_lib_private.h b/TMessagesProj/jni/giflib/gif_lib_private.h
deleted file mode 100755
index adaf5571e..000000000
--- a/TMessagesProj/jni/giflib/gif_lib_private.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/****************************************************************************
-
-gif_lib_private.h - internal giflib routines and structures
-
-****************************************************************************/
-
-#ifndef _GIF_LIB_PRIVATE_H
-#define _GIF_LIB_PRIVATE_H
-
-#include "gif_lib.h"
-#include "gif_hash.h"
-
-#define EXTENSION_INTRODUCER 0x21
-#define DESCRIPTOR_INTRODUCER 0x2c
-#define TERMINATOR_INTRODUCER 0x3b
-
-#define LZ_MAX_CODE 4095 /* Biggest code possible in 12 bits. */
-#define LZ_BITS 12
-
-#define FLUSH_OUTPUT 4096 /* Impossible code, to signal flush. */
-#define FIRST_CODE 4097 /* Impossible code, to signal first. */
-#define NO_SUCH_CODE 4098 /* Impossible code, to signal empty. */
-
-#define FILE_STATE_WRITE 0x01
-#define FILE_STATE_SCREEN 0x02
-#define FILE_STATE_IMAGE 0x04
-#define FILE_STATE_READ 0x08
-
-#define IS_READABLE(Private) (Private->FileState & FILE_STATE_READ)
-#define IS_WRITEABLE(Private) (Private->FileState & FILE_STATE_WRITE)
-
-typedef struct GifFilePrivateType {
- GifWord FileState, FileHandle, /* Where all this data goes to! */
- BitsPerPixel, /* Bits per pixel (Codes uses at least this + 1). */
- ClearCode, /* The CLEAR LZ code. */
- EOFCode, /* The EOF LZ code. */
- RunningCode, /* The next code algorithm can generate. */
- RunningBits, /* The number of bits required to represent RunningCode. */
- MaxCode1, /* 1 bigger than max. possible code, in RunningBits bits. */
- LastCode, /* The code before the current code. */
- CrntCode, /* Current algorithm code. */
- StackPtr, /* For character stack (see below). */
- CrntShiftState; /* Number of bits in CrntShiftDWord. */
- unsigned long CrntShiftDWord; /* For bytes decomposition into codes. */
- unsigned long PixelCount; /* Number of pixels in image. */
- FILE *File; /* File as stream. */
- InputFunc Read; /* function to read gif input (TVT) */
- OutputFunc Write; /* function to write gif output (MRB) */
- GifByteType Buf[256]; /* Compressed input is buffered here. */
- GifByteType Stack[LZ_MAX_CODE]; /* Decoded pixels are stacked here. */
- GifByteType Suffix[LZ_MAX_CODE + 1]; /* So we can trace the codes. */
- GifPrefixType Prefix[LZ_MAX_CODE + 1];
- GifHashTableType *HashTable;
- bool gif89;
-} GifFilePrivateType;
-
-#endif /* _GIF_LIB_PRIVATE_H */
-
-/* end */
diff --git a/TMessagesProj/jni/giflib/gifalloc.c b/TMessagesProj/jni/giflib/gifalloc.c
deleted file mode 100755
index 5726e7681..000000000
--- a/TMessagesProj/jni/giflib/gifalloc.c
+++ /dev/null
@@ -1,400 +0,0 @@
-/*****************************************************************************
-
- GIF construction tools
-
-****************************************************************************/
-
-#include
-#include
-#include
-
-#include "gif_lib.h"
-
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-
-/******************************************************************************
- Miscellaneous utility functions
-******************************************************************************/
-
-/* return smallest bitfield size n will fit in */
-int
-GifBitSize(int n)
-{
- register int i;
-
- for (i = 1; i <= 8; i++)
- if ((1 << i) >= n)
- break;
- return (i);
-}
-
-/******************************************************************************
- Color map object functions
-******************************************************************************/
-
-/*
- * Allocate a color map of given size; initialize with contents of
- * ColorMap if that pointer is non-NULL.
- */
-ColorMapObject *
-GifMakeMapObject(int ColorCount, const GifColorType *ColorMap)
-{
- ColorMapObject *Object;
-
- /*** FIXME: Our ColorCount has to be a power of two. Is it necessary to
- * make the user know that or should we automatically round up instead? */
- if (ColorCount != (1 << GifBitSize(ColorCount))) {
- return ((ColorMapObject *) NULL);
- }
-
- Object = (ColorMapObject *)malloc(sizeof(ColorMapObject));
- if (Object == (ColorMapObject *) NULL) {
- return ((ColorMapObject *) NULL);
- }
-
- Object->Colors = (GifColorType *)calloc(ColorCount, sizeof(GifColorType));
- if (Object->Colors == (GifColorType *) NULL) {
- free(Object);
- return ((ColorMapObject *) NULL);
- }
-
- Object->ColorCount = ColorCount;
- Object->BitsPerPixel = GifBitSize(ColorCount);
-
- if (ColorMap != NULL) {
- memcpy((char *)Object->Colors,
- (char *)ColorMap, ColorCount * sizeof(GifColorType));
- }
-
- return (Object);
-}
-
-/*******************************************************************************
-Free a color map object
-*******************************************************************************/
-void
-GifFreeMapObject(ColorMapObject *Object)
-{
- if (Object != NULL) {
- (void)free(Object->Colors);
- (void)free(Object);
- }
-}
-
-#ifdef DEBUG
-void
-DumpColorMap(ColorMapObject *Object,
- FILE * fp)
-{
- if (Object != NULL) {
- int i, j, Len = Object->ColorCount;
-
- for (i = 0; i < Len; i += 4) {
- for (j = 0; j < 4 && j < Len; j++) {
- (void)fprintf(fp, "%3d: %02x %02x %02x ", i + j,
- Object->Colors[i + j].Red,
- Object->Colors[i + j].Green,
- Object->Colors[i + j].Blue);
- }
- (void)fprintf(fp, "\n");
- }
- }
-}
-#endif /* DEBUG */
-
-/*******************************************************************************
- Compute the union of two given color maps and return it. If result can't
- fit into 256 colors, NULL is returned, the allocated union otherwise.
- ColorIn1 is copied as is to ColorUnion, while colors from ColorIn2 are
- copied iff they didn't exist before. ColorTransIn2 maps the old
- ColorIn2 into the ColorUnion color map table./
-*******************************************************************************/
-ColorMapObject *
-GifUnionColorMap(const ColorMapObject *ColorIn1,
- const ColorMapObject *ColorIn2,
- GifPixelType ColorTransIn2[])
-{
- int i, j, CrntSlot, RoundUpTo, NewGifBitSize;
- ColorMapObject *ColorUnion;
-
- /*
- * We don't worry about duplicates within either color map; if
- * the caller wants to resolve those, he can perform unions
- * with an empty color map.
- */
-
- /* Allocate table which will hold the result for sure. */
- ColorUnion = GifMakeMapObject(MAX(ColorIn1->ColorCount,
- ColorIn2->ColorCount) * 2, NULL);
-
- if (ColorUnion == NULL)
- return (NULL);
-
- /*
- * Copy ColorIn1 to ColorUnion.
- */
- for (i = 0; i < ColorIn1->ColorCount; i++)
- ColorUnion->Colors[i] = ColorIn1->Colors[i];
- CrntSlot = ColorIn1->ColorCount;
-
- /*
- * Potentially obnoxious hack:
- *
- * Back CrntSlot down past all contiguous {0, 0, 0} slots at the end
- * of table 1. This is very useful if your display is limited to
- * 16 colors.
- */
- while (ColorIn1->Colors[CrntSlot - 1].Red == 0
- && ColorIn1->Colors[CrntSlot - 1].Green == 0
- && ColorIn1->Colors[CrntSlot - 1].Blue == 0)
- CrntSlot--;
-
- /* Copy ColorIn2 to ColorUnion (use old colors if they exist): */
- for (i = 0; i < ColorIn2->ColorCount && CrntSlot <= 256; i++) {
- /* Let's see if this color already exists: */
- for (j = 0; j < ColorIn1->ColorCount; j++)
- if (memcmp (&ColorIn1->Colors[j], &ColorIn2->Colors[i],
- sizeof(GifColorType)) == 0)
- break;
-
- if (j < ColorIn1->ColorCount)
- ColorTransIn2[i] = j; /* color exists in Color1 */
- else {
- /* Color is new - copy it to a new slot: */
- ColorUnion->Colors[CrntSlot] = ColorIn2->Colors[i];
- ColorTransIn2[i] = CrntSlot++;
- }
- }
-
- if (CrntSlot > 256) {
- GifFreeMapObject(ColorUnion);
- return ((ColorMapObject *) NULL);
- }
-
- NewGifBitSize = GifBitSize(CrntSlot);
- RoundUpTo = (1 << NewGifBitSize);
-
- if (RoundUpTo != ColorUnion->ColorCount) {
- register GifColorType *Map = ColorUnion->Colors;
-
- /*
- * Zero out slots up to next power of 2.
- * We know these slots exist because of the way ColorUnion's
- * start dimension was computed.
- */
- for (j = CrntSlot; j < RoundUpTo; j++)
- Map[j].Red = Map[j].Green = Map[j].Blue = 0;
-
- /* perhaps we can shrink the map? */
- if (RoundUpTo < ColorUnion->ColorCount)
- ColorUnion->Colors = (GifColorType *)realloc(Map,
- sizeof(GifColorType) * RoundUpTo);
- }
-
- ColorUnion->ColorCount = RoundUpTo;
- ColorUnion->BitsPerPixel = NewGifBitSize;
-
- return (ColorUnion);
-}
-
-/*******************************************************************************
- Apply a given color translation to the raster bits of an image
-*******************************************************************************/
-void
-GifApplyTranslation(SavedImage *Image, GifPixelType Translation[])
-{
- register int i;
- register int RasterSize = Image->ImageDesc.Height * Image->ImageDesc.Width;
-
- for (i = 0; i < RasterSize; i++)
- Image->RasterBits[i] = Translation[Image->RasterBits[i]];
-}
-
-/******************************************************************************
- Extension record functions
-******************************************************************************/
-int
-GifAddExtensionBlock(int *ExtensionBlockCount,
- ExtensionBlock **ExtensionBlocks,
- int Function,
- unsigned int Len,
- unsigned char ExtData[])
-{
- ExtensionBlock *ep;
-
- if (*ExtensionBlocks == NULL)
- *ExtensionBlocks=(ExtensionBlock *)malloc(sizeof(ExtensionBlock));
- else
- *ExtensionBlocks = (ExtensionBlock *)realloc(*ExtensionBlocks,
- sizeof(ExtensionBlock) *
- (*ExtensionBlockCount + 1));
-
- if (*ExtensionBlocks == NULL)
- return (GIF_ERROR);
-
- ep = &(*ExtensionBlocks)[(*ExtensionBlockCount)++];
-
- ep->Function = Function;
- ep->ByteCount=Len;
- ep->Bytes = (GifByteType *)malloc(ep->ByteCount);
- if (ep->Bytes == NULL)
- return (GIF_ERROR);
-
- if (ExtData != NULL) {
- memcpy(ep->Bytes, ExtData, Len);
- }
-
- return (GIF_OK);
-}
-
-void
-GifFreeExtensions(int *ExtensionBlockCount,
- ExtensionBlock **ExtensionBlocks)
-{
- ExtensionBlock *ep;
-
- if (*ExtensionBlocks == NULL)
- return;
-
- for (ep = *ExtensionBlocks;
- ep < (*ExtensionBlocks + *ExtensionBlockCount);
- ep++)
- (void)free((char *)ep->Bytes);
- (void)free((char *)*ExtensionBlocks);
- *ExtensionBlocks = NULL;
- *ExtensionBlockCount = 0;
-}
-
-/******************************************************************************
- Image block allocation functions
-******************************************************************************/
-
-/* Private Function:
- * Frees the last image in the GifFile->SavedImages array
- */
-void
-FreeLastSavedImage(GifFileType *GifFile)
-{
- SavedImage *sp;
-
- if ((GifFile == NULL) || (GifFile->SavedImages == NULL))
- return;
-
- /* Remove one SavedImage from the GifFile */
- GifFile->ImageCount--;
- sp = &GifFile->SavedImages[GifFile->ImageCount];
-
- /* Deallocate its Colormap */
- if (sp->ImageDesc.ColorMap != NULL) {
- GifFreeMapObject(sp->ImageDesc.ColorMap);
- sp->ImageDesc.ColorMap = NULL;
- }
-
- /* Deallocate the image data */
- if (sp->RasterBits != NULL)
- free((char *)sp->RasterBits);
-
- /* Deallocate any extensions */
- GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
-
- /*** FIXME: We could realloc the GifFile->SavedImages structure but is
- * there a point to it? Saves some memory but we'd have to do it every
- * time. If this is used in GifFreeSavedImages then it would be inefficient
- * (The whole array is going to be deallocated.) If we just use it when
- * we want to free the last Image it's convenient to do it here.
- */
-}
-
-/*
- * Append an image block to the SavedImages array
- */
-SavedImage *
-GifMakeSavedImage(GifFileType *GifFile, const SavedImage *CopyFrom)
-{
- if (GifFile->SavedImages == NULL)
- GifFile->SavedImages = (SavedImage *)malloc(sizeof(SavedImage));
- else
- GifFile->SavedImages = (SavedImage *)realloc(GifFile->SavedImages,
- sizeof(SavedImage) * (GifFile->ImageCount + 1));
-
- if (GifFile->SavedImages == NULL)
- return ((SavedImage *)NULL);
- else {
- SavedImage *sp = &GifFile->SavedImages[GifFile->ImageCount++];
- memset((char *)sp, '\0', sizeof(SavedImage));
-
- if (CopyFrom != NULL) {
- memcpy((char *)sp, CopyFrom, sizeof(SavedImage));
-
- /*
- * Make our own allocated copies of the heap fields in the
- * copied record. This guards against potential aliasing
- * problems.
- */
-
- /* first, the local color map */
- if (sp->ImageDesc.ColorMap != NULL) {
- sp->ImageDesc.ColorMap = GifMakeMapObject(
- CopyFrom->ImageDesc.ColorMap->ColorCount,
- CopyFrom->ImageDesc.ColorMap->Colors);
- if (sp->ImageDesc.ColorMap == NULL) {
- FreeLastSavedImage(GifFile);
- return (SavedImage *)(NULL);
- }
- }
-
- /* next, the raster */
- sp->RasterBits = (unsigned char *)malloc(sizeof(GifPixelType) *
- CopyFrom->ImageDesc.Height *
- CopyFrom->ImageDesc.Width);
- if (sp->RasterBits == NULL) {
- FreeLastSavedImage(GifFile);
- return (SavedImage *)(NULL);
- }
- memcpy(sp->RasterBits, CopyFrom->RasterBits,
- sizeof(GifPixelType) * CopyFrom->ImageDesc.Height *
- CopyFrom->ImageDesc.Width);
-
- /* finally, the extension blocks */
- if (sp->ExtensionBlocks != NULL) {
- sp->ExtensionBlocks = (ExtensionBlock *)malloc(
- sizeof(ExtensionBlock) *
- CopyFrom->ExtensionBlockCount);
- if (sp->ExtensionBlocks == NULL) {
- FreeLastSavedImage(GifFile);
- return (SavedImage *)(NULL);
- }
- memcpy(sp->ExtensionBlocks, CopyFrom->ExtensionBlocks,
- sizeof(ExtensionBlock) * CopyFrom->ExtensionBlockCount);
- }
- }
-
- return (sp);
- }
-}
-
-void
-GifFreeSavedImages(GifFileType *GifFile)
-{
- SavedImage *sp;
-
- if ((GifFile == NULL) || (GifFile->SavedImages == NULL)) {
- return;
- }
- for (sp = GifFile->SavedImages;
- sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
- if (sp->ImageDesc.ColorMap != NULL) {
- GifFreeMapObject(sp->ImageDesc.ColorMap);
- sp->ImageDesc.ColorMap = NULL;
- }
-
- if (sp->RasterBits != NULL)
- free((char *)sp->RasterBits);
-
- GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
- }
- free((char *)GifFile->SavedImages);
- GifFile->SavedImages = NULL;
-}
-
-/* end */
diff --git a/TMessagesProj/jni/gifvideo.cpp b/TMessagesProj/jni/gifvideo.cpp
new file mode 100644
index 000000000..10e4c2d02
--- /dev/null
+++ b/TMessagesProj/jni/gifvideo.cpp
@@ -0,0 +1,266 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+extern "C" {
+#include
+
+static const std::string av_make_error_str(int errnum) {
+ char errbuf[AV_ERROR_MAX_STRING_SIZE];
+ av_strerror(errnum, errbuf, AV_ERROR_MAX_STRING_SIZE);
+ return (std::string) errbuf;
+}
+
+#undef av_err2str
+#define av_err2str(errnum) av_make_error_str(errnum).c_str()
+
+typedef struct VideoInfo {
+
+ ~VideoInfo() {
+ if (video_dec_ctx) {
+ avcodec_close(video_dec_ctx);
+ video_dec_ctx = nullptr;
+ }
+ if (fmt_ctx) {
+ avformat_close_input(&fmt_ctx);
+ fmt_ctx = nullptr;
+ }
+ if (frame) {
+ av_frame_free(&frame);
+ frame = nullptr;
+ }
+ if (src) {
+ delete [] src;
+ src = nullptr;
+ }
+ av_free_packet(&orig_pkt);
+
+ video_stream_idx = -1;
+ video_stream = nullptr;
+ }
+
+ AVFormatContext *fmt_ctx = nullptr;
+ char *src = nullptr;
+ int video_stream_idx = -1;
+ AVStream *video_stream = nullptr;
+ AVCodecContext *video_dec_ctx = nullptr;
+ AVFrame *frame = nullptr;
+ bool has_decoded_frames = false;
+ AVPacket pkt;
+ AVPacket orig_pkt;
+};
+
+jobject makeGlobarRef(JNIEnv *env, jobject object) {
+ if (object) {
+ return env->NewGlobalRef(object);
+ }
+ return 0;
+}
+
+int gifvideoOnJNILoad(JavaVM *vm, JNIEnv *env) {
+ av_register_all();
+ return 0;
+}
+
+int open_codec_context(int *stream_idx, AVFormatContext *fmt_ctx, enum AVMediaType type) {
+ int ret;
+ AVStream *st;
+ AVCodecContext *dec_ctx = NULL;
+ AVCodec *dec = NULL;
+ AVDictionary *opts = NULL;
+
+ ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0);
+ if (ret < 0) {
+ LOGE("can't find %s stream in input file\n", av_get_media_type_string(type));
+ return ret;
+ } else {
+ *stream_idx = ret;
+ st = fmt_ctx->streams[*stream_idx];
+
+ dec_ctx = st->codec;
+ dec = avcodec_find_decoder(dec_ctx->codec_id);
+ if (!dec) {
+ LOGE("failed to find %s codec\n", av_get_media_type_string(type));
+ return ret;
+ }
+
+ av_dict_set(&opts, "refcounted_frames", "1", 0);
+ if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
+ LOGE("failed to open %s codec\n", av_get_media_type_string(type));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int decode_packet(VideoInfo *info, int *got_frame) {
+ int ret = 0;
+ int decoded = info->pkt.size;
+
+ *got_frame = 0;
+ if (info->pkt.stream_index == info->video_stream_idx) {
+ ret = avcodec_decode_video2(info->video_dec_ctx, info->frame, got_frame, &info->pkt);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ return decoded;
+}
+
+jint Java_org_telegram_ui_Components_AnimatedFileDrawable_createDecoder(JNIEnv *env, jclass clazz, jstring src, jintArray data) {
+ VideoInfo *info = new VideoInfo();
+
+ char const *srcString = env->GetStringUTFChars(src, 0);
+ int len = strlen(srcString);
+ info->src = new char[len + 1];
+ memcpy(info->src, srcString, len);
+ info->src[len] = '\0';
+ if (srcString != 0) {
+ env->ReleaseStringUTFChars(src, srcString);
+ }
+
+ int ret;
+ if ((ret = avformat_open_input(&info->fmt_ctx, info->src, NULL, NULL)) < 0) {
+ LOGE("can't open source file %s, %s", info->src, av_err2str(ret));
+ delete info;
+ return 0;
+ }
+
+ if ((ret = avformat_find_stream_info(info->fmt_ctx, NULL)) < 0) {
+ LOGE("can't find stream information %s, %s", info->src, av_err2str(ret));
+ delete info;
+ return 0;
+ }
+
+ if (open_codec_context(&info->video_stream_idx, info->fmt_ctx, AVMEDIA_TYPE_VIDEO) >= 0) {
+ info->video_stream = info->fmt_ctx->streams[info->video_stream_idx];
+ info->video_dec_ctx = info->video_stream->codec;
+ }
+
+ if (info->video_stream <= 0) {
+ LOGE("can't find video stream in the input, aborting %s", info->src);
+ delete info;
+ return 0;
+ }
+
+ info->frame = av_frame_alloc();
+ if (info->frame == nullptr) {
+ LOGE("can't allocate frame %s", info->src);
+ delete info;
+ return 0;
+ }
+
+ av_init_packet(&info->pkt);
+ info->pkt.data = NULL;
+ info->pkt.size = 0;
+
+ jint *dataArr = env->GetIntArrayElements(data, 0);
+ if (dataArr != nullptr) {
+ dataArr[0] = info->video_dec_ctx->width;
+ dataArr[1] = info->video_dec_ctx->height;
+ env->ReleaseIntArrayElements(data, dataArr, 0);
+ }
+
+ //LOGD("successfully opened file %s", info->src);
+
+ return (int) info;
+}
+
+void Java_org_telegram_ui_Components_AnimatedFileDrawable_destroyDecoder(JNIEnv *env, jclass clazz, jobject ptr) {
+ if (ptr == NULL) {
+ return;
+ }
+ VideoInfo *info = (VideoInfo *) ptr;
+ delete info;
+}
+
+
+jint Java_org_telegram_ui_Components_AnimatedFileDrawable_getVideoFrame(JNIEnv *env, jclass clazz, jobject ptr, jobject bitmap, jintArray data) {
+ if (ptr == NULL || bitmap == nullptr) {
+ return 0;
+ }
+ VideoInfo *info = (VideoInfo *) ptr;
+ int ret = 0;
+ int got_frame = 0;
+
+ while (true) {
+ if (info->pkt.size == 0) {
+ ret = av_read_frame(info->fmt_ctx, &info->pkt);
+ //LOGD("got packet with size %d", info->pkt.size);
+ if (ret >= 0) {
+ info->orig_pkt = info->pkt;
+ }
+ }
+
+ if (info->pkt.size > 0) {
+ ret = decode_packet(info, &got_frame);
+ if (ret < 0) {
+ if (info->has_decoded_frames) {
+ ret = 0;
+ }
+ info->pkt.size = 0;
+ } else {
+ //LOGD("read size %d from packet", ret);
+ info->pkt.data += ret;
+ info->pkt.size -= ret;
+ }
+
+ if (info->pkt.size == 0) {
+ av_free_packet(&info->orig_pkt);
+ }
+ } else {
+ info->pkt.data = NULL;
+ info->pkt.size = 0;
+ ret = decode_packet(info, &got_frame);
+ if (ret < 0) {
+ LOGE("can't decode packet flushed %s", info->src);
+ return 0;
+ }
+ if (got_frame == 0) {
+ if (info->has_decoded_frames) {
+ //LOGD("file end reached %s", info->src);
+ if ((ret = avformat_seek_file(info->fmt_ctx, -1, std::numeric_limits::min(), 0, std::numeric_limits::max(), 0)) < 0) {
+ LOGE("can't seek to begin of file %s, %s", info->src, av_err2str(ret));
+ return 0;
+ } else {
+ avcodec_flush_buffers(info->video_dec_ctx);
+ }
+ }
+ }
+ }
+ if (ret < 0) {
+ return 0;
+ }
+ if (got_frame) {
+ //LOGD("decoded frame with w = %d, h = %d, format = %d", info->frame->width, info->frame->height, info->frame->format);
+ if (info->frame->format == AV_PIX_FMT_YUV420P || info->frame->format == AV_PIX_FMT_BGRA) {
+ jint *dataArr = env->GetIntArrayElements(data, 0);
+ if (dataArr != nullptr) {
+ dataArr[2] = (int) (1000 * info->frame->pkt_pts * av_q2d(info->video_stream->time_base));
+ env->ReleaseIntArrayElements(data, dataArr, 0);
+ }
+
+ void *pixels;
+ if (AndroidBitmap_lockPixels(env, bitmap, &pixels) >= 0) {
+ if (info->frame->format == AV_PIX_FMT_YUV420P) {
+ //LOGD("y %d, u %d, v %d, width %d, height %d", info->frame->linesize[0], info->frame->linesize[2], info->frame->linesize[1], info->frame->width, info->frame->height);
+ libyuv::I420ToARGB(info->frame->data[0], info->frame->linesize[0], info->frame->data[2], info->frame->linesize[2], info->frame->data[1], info->frame->linesize[1], (uint8_t *) pixels, info->frame->width * 4, info->frame->width, info->frame->height);
+ } else if (info->frame->format == AV_PIX_FMT_BGRA) {
+ libyuv::ABGRToARGB(info->frame->data[0], info->frame->linesize[0], (uint8_t *) pixels, info->frame->width * 4, info->frame->width, info->frame->height);
+ }
+ AndroidBitmap_unlockPixels(env, bitmap);
+ }
+ }
+ info->has_decoded_frames = true;
+ av_frame_unref(info->frame);
+ return 1;
+ }
+ }
+ return 0;
+}
+}
diff --git a/TMessagesProj/jni/image.c b/TMessagesProj/jni/image.c
index 5445075b7..f7a2cfefa 100644
--- a/TMessagesProj/jni/image.c
+++ b/TMessagesProj/jni/image.c
@@ -68,7 +68,7 @@ static void fastBlurMore(int imageWidth, int imageHeight, int imageStride, void
const int r1 = radius + 1;
const int div = radius * 2 + 1;
- if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) {
+ if (radius > 15 || div >= w || div >= h || w * h > 150 * 150 || imageStride > imageWidth * 4) {
return;
}
@@ -151,6 +151,9 @@ static void fastBlurMore(int imageWidth, int imageHeight, int imageStride, void
static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pixels, int radius) {
uint8_t *pix = (uint8_t *)pixels;
+ if (pix == NULL) {
+ return;
+ }
const int w = imageWidth;
const int h = imageHeight;
const int stride = imageStride;
@@ -169,7 +172,7 @@ static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pix
return;
}
- if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) {
+ if (radius > 15 || div >= w || div >= h || w * h > 150 * 150 || imageStride > imageWidth * 4) {
return;
}
@@ -265,18 +268,12 @@ METHODDEF(void) my_error_exit(j_common_ptr cinfo) {
longjmp(myerr->setjmp_buffer, 1);
}
-JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jclass class, jobject bitmap, int radius, int unpin) {
+JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jclass class, jobject bitmap, int radius, int unpin, int width, int height, int stride) {
if (!bitmap) {
return;
}
- AndroidBitmapInfo info;
-
- if (AndroidBitmap_getInfo(env, bitmap, &info) < 0) {
- return;
- }
-
- if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888 || !info.width || !info.height || !info.stride) {
+ if (!width || !height || !stride) {
return;
}
@@ -285,9 +282,9 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jcl
return;
}
if (radius <= 3) {
- fastBlur(info.width, info.height, info.stride, pixels, radius);
+ fastBlur(width, height, stride, pixels, radius);
} else {
- fastBlurMore(info.width, info.height, info.stride, pixels, radius);
+ fastBlurMore(width, height, stride, pixels, radius);
}
if (unpin) {
AndroidBitmap_unlockPixels(env, bitmap);
@@ -399,10 +396,20 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_calcCDT(JNIEnv *env, jclass
}
JNIEXPORT int Java_org_telegram_messenger_Utilities_pinBitmap(JNIEnv *env, jclass class, jobject bitmap) {
+ if (bitmap == NULL) {
+ return;
+ }
unsigned char *pixels;
return AndroidBitmap_lockPixels(env, bitmap, &pixels) >= 0 ? 1 : 0;
}
+JNIEXPORT int Java_org_telegram_messenger_Utilities_unpinBitmap(JNIEnv *env, jclass class, jobject bitmap) {
+ if (bitmap == NULL) {
+ return;
+ }
+ AndroidBitmap_unlockPixels(env, bitmap);
+}
+
JNIEXPORT void Java_org_telegram_messenger_Utilities_loadBitmap(JNIEnv *env, jclass class, jstring path, jobject bitmap, int scale, int width, int height, int stride) {
AndroidBitmapInfo info;
diff --git a/TMessagesProj/jni/jni.c b/TMessagesProj/jni/jni.c
index 2bf216898..f300f8e71 100644
--- a/TMessagesProj/jni/jni.c
+++ b/TMessagesProj/jni/jni.c
@@ -7,10 +7,10 @@
#include
#include "utils.h"
#include "sqlite.h"
-#include "gif.h"
#include "image.h"
int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env);
+int gifvideoOnJNILoad(JavaVM *vm, JNIEnv *env);
jint JNI_OnLoad(JavaVM *vm, void *reserved) {
JNIEnv *env = 0;
@@ -27,18 +27,20 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
if (imageOnJNILoad(vm, reserved, env) == -1) {
return -1;
}
+
+ if (gifvideoOnJNILoad(vm, env) == -1) {
+ return -1;
+ }
if (registerNativeTgNetFunctions(vm, env) != JNI_TRUE) {
return -1;
}
- gifOnJNILoad(vm, reserved, env);
-
return JNI_VERSION_1_6;
}
void JNI_OnUnload(JavaVM *vm, void *reserved) {
- gifOnJNIUnload(vm, reserved);
+
}
JNIEXPORT void Java_org_telegram_messenger_Utilities_aesIgeEncryption(JNIEnv *env, jclass class, jobject buffer, jbyteArray key, jbyteArray iv, jboolean encrypt, int offset, int length) {
diff --git a/TMessagesProj/jni/libyuv/include/libyuv.h b/TMessagesProj/jni/libyuv/include/libyuv.h
index 3bebe642c..de652836e 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv.h
@@ -18,7 +18,6 @@
#include "libyuv/convert_from.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
#include "libyuv/mjpeg_decoder.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/compare.h b/TMessagesProj/jni/libyuv/include/libyuv/compare.h
index 5dfac7c86..08b2bb2ec 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/compare.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/compare.h
@@ -22,6 +22,11 @@ extern "C" {
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
+// Scan an opaque argb image and return fourcc based on alpha offset.
+// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
+LIBYUV_API
+uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
+
// Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a,
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/compare_row.h b/TMessagesProj/jni/libyuv/include/libyuv/compare_row.h
new file mode 100644
index 000000000..4562da047
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/include/libyuv/compare_row.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_COMPARE_ROW_H_ // NOLINT
+#define INCLUDE_LIBYUV_COMPARE_ROW_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__i386__) && !defined(__SSE2__))
+#define LIBYUV_DISABLE_X86
+#endif
+
+// Visual C 2012 required for AVX2.
+#if defined(_M_IX86) && !defined(__clang__) && \
+ defined(_MSC_VER) && _MSC_VER >= 1700
+#define VISUALC_HAS_AVX2 1
+#endif // VisualStudio >= 2012
+
+// clang >= 3.4.0 required for AVX2.
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
+#define CLANG_HAS_AVX2 1
+#endif // clang >= 3.4
+#endif // __clang__
+
+#if defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
+#define HAS_HASHDJB2_AVX2
+#endif
+
+// The following are available for Visual C and GCC:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) || defined(_M_IX86)))
+#define HAS_HASHDJB2_SSE41
+#define HAS_SUMSQUAREERROR_SSE2
+#endif
+
+// The following are available for Visual C and clangcl 32 bit:
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
+ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
+#define HAS_HASHDJB2_AVX2
+#define HAS_SUMSQUAREERROR_AVX2
+#endif
+
+// The following are available for Neon:
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_SUMSQUAREERROR_NEON
+#endif
+
+uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
+uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
+
+uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
+uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_COMPARE_ROW_H_ NOLINT
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/convert.h b/TMessagesProj/jni/libyuv/include/libyuv/convert.h
index 1bd45c837..a8d3fa07a 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/convert.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/convert.h
@@ -71,6 +71,8 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v,
int width, int height);
+#define J400ToJ420 I400ToI420
+
// Convert NV12 to I420.
LIBYUV_API
int NV12ToI420(const uint8* src_y, int src_stride_y,
@@ -113,15 +115,6 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_v, int dst_stride_v,
int width, int height);
-// Convert Q420 to I420.
-LIBYUV_API
-int Q420ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
@@ -211,8 +204,6 @@ int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height);
#endif
-// Note Bayer formats (BGGR) To I420 are in format_conversion.h
-
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/convert_argb.h b/TMessagesProj/jni/libyuv/include/libyuv/convert_argb.h
index a18014ca2..ce4e3d075 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/convert_argb.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/convert_argb.h
@@ -18,7 +18,6 @@
#include "libyuv/rotate.h"
// TODO(fbarchard): This set of functions should exactly match convert.h
-// Add missing Q420.
// TODO(fbarchard): Add tests. Create random content of right size and convert
// with C vs Opt and or to I420 and compare.
// TODO(fbarchard): Some of these functions lack parameter setting.
@@ -61,6 +60,22 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height);
+
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height);
+
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
@@ -69,20 +84,38 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
-// Convert I400 (grey) to ARGB.
+// Convert I420 with Alpha to preattenuated ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ const uint8* src_a, int src_stride_a,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height, int attenuate);
+
+// Convert I420 with Alpha to preattenuated ABGR.
+LIBYUV_API
+int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ const uint8* src_a, int src_stride_a,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height, int attenuate);
+
+// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
-// Alias.
-#define YToARGB I400ToARGB_Reference
-
-// Convert I400 to ARGB. Reverse of ARGBToI400.
+// Convert J400 (jpeg grey) to ARGB.
LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
+int J400ToARGB(const uint8* src_y, int src_stride_y,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height);
+
+// Alias.
+#define YToARGB I400ToARGB
// Convert NV12 to ARGB.
LIBYUV_API
@@ -104,13 +137,6 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
-// TODO(fbarchard): Convert Q420 to ARGB.
-// LIBYUV_API
-// int Q420ToARGB(const uint8* src_y, int src_stride_y,
-// const uint8* src_yuy2, int src_stride_yuy2,
-// uint8* dst_argb, int dst_stride_argb,
-// int width, int height);
-
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
@@ -123,6 +149,70 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
+// Convert J420 to ARGB.
+LIBYUV_API
+int J420ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height);
+
+// Convert J422 to ARGB.
+LIBYUV_API
+int J422ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height);
+
+// Convert J420 to ABGR.
+LIBYUV_API
+int J420ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height);
+
+// Convert J422 to ABGR.
+LIBYUV_API
+int J422ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height);
+
+// Convert H420 to ARGB.
+LIBYUV_API
+int H420ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height);
+
+// Convert H422 to ARGB.
+LIBYUV_API
+int H422ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height);
+
+// Convert H420 to ABGR.
+LIBYUV_API
+int H420ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height);
+
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height);
+
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
@@ -184,8 +274,6 @@ int MJPGToARGB(const uint8* sample, size_t sample_size,
int dst_width, int dst_height);
#endif
-// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
-
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/convert_from.h b/TMessagesProj/jni/libyuv/include/libyuv/convert_from.h
index b1cf57f7d..9fd8d4de5 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/convert_from.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/convert_from.h
@@ -57,7 +57,6 @@ int I400Copy(const uint8* src_y, int src_stride_y,
int width, int height);
// TODO(fbarchard): I420ToM420
-// TODO(fbarchard): I420ToQ420
LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y,
@@ -138,6 +137,17 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
+// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
+// Values in dither matrix from 0 to 7 recommended.
+// The order of the dither matrix is first byte is upper left.
+
+LIBYUV_API
+int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_frame, int dst_stride_frame,
+ const uint8* dither4x4, int width, int height);
+
LIBYUV_API
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
@@ -152,8 +162,6 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
-// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
-
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/convert_from_argb.h b/TMessagesProj/jni/libyuv/include/libyuv/convert_from_argb.h
index 90f43af04..1df53200d 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/convert_from_argb.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/convert_from_argb.h
@@ -61,6 +61,16 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
+// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
+// Values in dither matrix from 0 to 7 recommended.
+// The order of the dither matrix is first byte is upper left.
+// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
+// const uint8(*dither)[4][4];
+LIBYUV_API
+int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
+ uint8* dst_rgb565, int dst_stride_rgb565,
+ const uint8* dither4x4, int width, int height);
+
// Convert ARGB To ARGB1555.
LIBYUV_API
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
@@ -105,6 +115,14 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
uint8* dst_v, int dst_stride_v,
int width, int height);
+// Convert ARGB to J422.
+LIBYUV_API
+int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
+ uint8* dst_yj, int dst_stride_yj,
+ uint8* dst_u, int dst_stride_u,
+ uint8* dst_v, int dst_stride_v,
+ int width, int height);
+
// Convert ARGB To I411.
LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
@@ -125,6 +143,12 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
+// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
+LIBYUV_API
+int ARGBToG(const uint8* src_argb, int src_stride_argb,
+ uint8* dst_g, int dst_stride_g,
+ int width, int height);
+
// Convert ARGB To NV12.
LIBYUV_API
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/cpu_id.h b/TMessagesProj/jni/libyuv/include/libyuv/cpu_id.h
index dc858a814..a83edd501 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/cpu_id.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/cpu_id.h
@@ -18,9 +18,8 @@ namespace libyuv {
extern "C" {
#endif
-// TODO(fbarchard): Consider overlapping bits for different architectures.
// Internal flag to indicate cpuid requires initialization.
-#define kCpuInit 0x1
+static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
@@ -37,12 +36,12 @@ static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
+static const int kCpuHasAVX3 = 0x2000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
-static const int kCpuHasMIPS_DSP = 0x20000;
-static const int kCpuHasMIPS_DSPR2 = 0x40000;
+static const int kCpuHasMIPS_DSPR2 = 0x20000;
// Internal function used to auto-init.
LIBYUV_API
@@ -57,7 +56,7 @@ int ArmCpuCaps(const char* cpuinfo_name);
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
- return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
+ return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
}
// For testing, allow CPU flags to be disabled.
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/format_conversion.h b/TMessagesProj/jni/libyuv/include/libyuv/format_conversion.h
deleted file mode 100644
index b18bf0534..000000000
--- a/TMessagesProj/jni/libyuv/include/libyuv/format_conversion.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT
-#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert Bayer RGB formats to I420.
-LIBYUV_API
-int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Temporary API mapper.
-#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
- BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
-
-LIBYUV_API
-int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- uint32 src_fourcc_bayer);
-
-// Convert I420 to Bayer RGB formats.
-LIBYUV_API
-int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Temporary API mapper.
-#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
- I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
-
-LIBYUV_API
-int I420ToBayer(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height,
- uint32 dst_fourcc_bayer);
-
-// Convert Bayer RGB formats to ARGB.
-LIBYUV_API
-int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Temporary API mapper.
-#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
-
-LIBYUV_API
-int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
- uint32 src_fourcc_bayer);
-
-// Converts ARGB to Bayer RGB formats.
-LIBYUV_API
-int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-LIBYUV_API
-int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-LIBYUV_API
-int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-LIBYUV_API
-int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-// Temporary API mapper.
-#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
-
-LIBYUV_API
-int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height,
- uint32 dst_fourcc_bayer);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/planar_functions.h b/TMessagesProj/jni/libyuv/include/libyuv/planar_functions.h
index d10a16985..9d30225d4 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/planar_functions.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/planar_functions.h
@@ -45,6 +45,7 @@ int I400ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
+#define J400ToJ400 I400ToI400
// Copy I422 to I422.
#define I422ToI422 I422Copy
@@ -84,6 +85,18 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_v, int dst_stride_v,
int width, int height);
+LIBYUV_API
+int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_uv, int dst_stride_uv,
+ int width, int height);
+
+LIBYUV_API
+int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_uv, int dst_stride_uv,
+ int width, int height);
+
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API
int I420ToI400(const uint8* src_y, int src_stride_y,
@@ -93,6 +106,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y,
int width, int height);
// Alias
+#define J420ToJ400 I420ToI400
#define I420ToI420Mirror I420Mirror
// I420 mirror.
@@ -131,13 +145,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height);
-
// I422ToARGB is in convert_argb.h
// Convert I422 to BGRA.
LIBYUV_API
@@ -163,6 +170,14 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
+// Alias
+#define RGB24ToRAW RAWToRGB24
+
+LIBYUV_API
+int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
+ uint8* dst_rgb24, int dst_stride_rgb24,
+ int width, int height);
+
// Draw a rectangle into I420.
LIBYUV_API
int I420Rect(uint8* dst_y, int dst_stride_y,
@@ -267,13 +282,13 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
-// Copy ARGB to ARGB.
+// Copy Alpha channel of ARGB to alpha of ARGB.
LIBYUV_API
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
-// Copy ARGB to ARGB.
+// Copy Y channel to Alpha of ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
@@ -287,6 +302,7 @@ LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination.
+// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
LIBYUV_API
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
@@ -294,6 +310,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
+// Alpha Blend plane and store to destination.
+// Source is not pre-multiplied by alpha.
+LIBYUV_API
+int BlendPlane(const uint8* src_y0, int src_stride_y0,
+ const uint8* src_y1, int src_stride_y1,
+ const uint8* alpha, int alpha_stride,
+ uint8* dst_y, int dst_stride_y,
+ int width, int height);
+
+// Alpha Blend YUV images and store to destination.
+// Source is not pre-multiplied by alpha.
+// Alpha is full width x height and subsampled to half size to apply to UV.
+LIBYUV_API
+int I420Blend(const uint8* src_y0, int src_stride_y0,
+ const uint8* src_u0, int src_stride_u0,
+ const uint8* src_v0, int src_stride_v0,
+ const uint8* src_y1, int src_stride_y1,
+ const uint8* src_u1, int src_stride_u1,
+ const uint8* src_v1, int src_stride_v1,
+ const uint8* alpha, int alpha_stride,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_u, int dst_stride_u,
+ uint8* dst_v, int dst_stride_v,
+ int width, int height);
+
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
@@ -375,36 +416,57 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
-// Interpolate between two ARGB images using specified amount of interpolation
+// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
-// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
-// and 255 means 1% src_argb0 and 99% src_argb1.
-// Internally uses ARGBScale bilinear filtering.
-// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
+// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
+// and 255 means 1% src0 and 99% src1.
+LIBYUV_API
+int InterpolatePlane(const uint8* src0, int src_stride0,
+ const uint8* src1, int src_stride1,
+ uint8* dst, int dst_stride,
+ int width, int height, int interpolation);
+
+// Interpolate between two ARGB images using specified amount of interpolation
+// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
- defined(TARGET_IPHONE_SIMULATOR)
+// Interpolate between two YUV images using specified amount of interpolation
+// Internally calls InterpolatePlane on each plane where the U and V planes
+// are half width and half height.
+LIBYUV_API
+int I420Interpolate(const uint8* src0_y, int src0_stride_y,
+ const uint8* src0_u, int src0_stride_u,
+ const uint8* src0_v, int src0_stride_v,
+ const uint8* src1_y, int src1_stride_y,
+ const uint8* src1_u, int src1_stride_u,
+ const uint8* src1_v, int src1_stride_v,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_u, int dst_stride_u,
+ uint8* dst_v, int dst_stride_v,
+ int width, int height, int interpolation);
+
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#define HAS_ARGBAFFINEROW_SSE2
+#endif
-// Row functions for copying a pixels from a source with a slope to a row
+// Row function for copying pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
-#define HAS_ARGBAFFINEROW_SSE2
-#endif // LIBYUV_DISABLE_X86
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned.
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/rotate_row.h b/TMessagesProj/jni/libyuv/include/libyuv/rotate_row.h
new file mode 100644
index 000000000..e3838295c
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/include/libyuv/rotate_row.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_ // NOLINT
+#define INCLUDE_LIBYUV_ROTATE_ROW_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__i386__) && !defined(__SSE2__))
+#define LIBYUV_DISABLE_X86
+#endif
+
+// The following are available for Visual C and clangcl 32 bit:
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+#define HAS_TRANSPOSEWX8_SSSE3
+#define HAS_TRANSPOSEUVWX8_SSE2
+#endif
+
+// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
+#define HAS_TRANSPOSEWX8_SSSE3
+#endif
+
+// The following are available for 64 bit GCC but not NaCL:
+#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
+ defined(__x86_64__)
+#define HAS_TRANSPOSEWX8_FAST_SSSE3
+#define HAS_TRANSPOSEUVWX8_SSE2
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_TRANSPOSEWX8_NEON
+#define HAS_TRANSPOSEUVWX8_NEON
+#endif
+
+#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
+ defined(__mips__) && \
+ defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+#define HAS_TRANSPOSEWX8_MIPS_DSPR2
+#define HAS_TRANSPOSEUVWX8_MIPS_DSPR2
+#endif // defined(__mips__)
+
+void TransposeWxH_C(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width, int height);
+
+void TransposeWx8_C(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_NEON(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_SSSE3(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+
+void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width);
+
+void TransposeUVWxH_C(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b,
+ int width, int height);
+
+void TransposeUVWx8_C(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width);
+
+void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_ NOLINT
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/row.h b/TMessagesProj/jni/libyuv/include/libyuv/row.h
index 477b27447..f7620eea1 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/row.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/row.h
@@ -15,10 +15,6 @@
#include "libyuv/basic_types.h"
-#if defined(__native_client__)
-#include "ppapi/c/pp_macros.h" // For PPAPI_RELEASE
-#endif
-
#ifdef __cplusplus
namespace libyuv {
extern "C" {
@@ -41,9 +37,8 @@ extern "C" {
free(var##_mem); \
var = 0
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
- defined(TARGET_IPHONE_SIMULATOR) || \
- (defined(_MSC_VER) && defined(__clang__))
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// True if compiling for SSSE3 as a requirement.
@@ -51,121 +46,15 @@ extern "C" {
#define LIBYUV_SSSE3_ONLY
#endif
-// Enable for NaCL pepper 33 for bundle and AVX2 support.
-#if defined(__native_client__) && PPAPI_RELEASE >= 33
-#define NEW_BINUTILS
-#endif
-#if defined(__native_client__) && defined(__arm__) && PPAPI_RELEASE < 37
+#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
-
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-// Effects:
-#define HAS_ARGBADDROW_SSE2
-#define HAS_ARGBAFFINEROW_SSE2
-#define HAS_ARGBATTENUATEROW_SSSE3
-#define HAS_ARGBBLENDROW_SSSE3
-#define HAS_ARGBCOLORMATRIXROW_SSSE3
-#define HAS_ARGBCOLORTABLEROW_X86
-#define HAS_ARGBCOPYALPHAROW_SSE2
-#define HAS_ARGBCOPYYTOALPHAROW_SSE2
-#define HAS_ARGBGRAYROW_SSSE3
-#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
-#define HAS_ARGBMIRRORROW_SSSE3
-#define HAS_ARGBMULTIPLYROW_SSE2
-#define HAS_ARGBPOLYNOMIALROW_SSE2
-#define HAS_ARGBQUANTIZEROW_SSE2
-#define HAS_ARGBSEPIAROW_SSSE3
-#define HAS_ARGBSHADEROW_SSE2
-#define HAS_ARGBSUBTRACTROW_SSE2
-#define HAS_ARGBTOUVROW_SSSE3
-#define HAS_ARGBUNATTENUATEROW_SSE2
-#define HAS_COMPUTECUMULATIVESUMROW_SSE2
-#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-#define HAS_INTERPOLATEROW_SSE2
-#define HAS_INTERPOLATEROW_SSSE3
-#define HAS_RGBCOLORTABLEROW_X86
-#define HAS_SOBELROW_SSE2
-#define HAS_SOBELTOPLANEROW_SSE2
-#define HAS_SOBELXROW_SSE2
-#define HAS_SOBELXYROW_SSE2
-#define HAS_SOBELYROW_SSE2
-
-// Conversions:
-#define HAS_ABGRTOUVROW_SSSE3
-#define HAS_ABGRTOYROW_SSSE3
-#define HAS_ARGB1555TOARGBROW_SSE2
-#define HAS_ARGB4444TOARGBROW_SSE2
-#define HAS_ARGBSHUFFLEROW_SSE2
-#define HAS_ARGBSHUFFLEROW_SSSE3
-#define HAS_ARGBTOARGB1555ROW_SSE2
-#define HAS_ARGBTOARGB4444ROW_SSE2
-#define HAS_ARGBTOBAYERGGROW_SSE2
-#define HAS_ARGBTOBAYERROW_SSSE3
-#define HAS_ARGBTORAWROW_SSSE3
-#define HAS_ARGBTORGB24ROW_SSSE3
-#define HAS_ARGBTORGB565ROW_SSE2
-#define HAS_ARGBTOUV422ROW_SSSE3
-#define HAS_ARGBTOUV444ROW_SSSE3
-#define HAS_ARGBTOUVJROW_SSSE3
-#define HAS_ARGBTOYJROW_SSSE3
-#define HAS_ARGBTOYROW_SSSE3
-#define HAS_BGRATOUVROW_SSSE3
-#define HAS_BGRATOYROW_SSSE3
-#define HAS_COPYROW_ERMS
-#define HAS_COPYROW_SSE2
-#define HAS_COPYROW_X86
-#define HAS_HALFROW_SSE2
-#define HAS_I400TOARGBROW_SSE2
-#define HAS_I411TOARGBROW_SSSE3
-#define HAS_I422TOARGB1555ROW_SSSE3
-#define HAS_I422TOABGRROW_SSSE3
-#define HAS_I422TOARGB1555ROW_SSSE3
-#define HAS_I422TOARGB4444ROW_SSSE3
-#define HAS_I422TOARGBROW_SSSE3
-#define HAS_I422TOBGRAROW_SSSE3
-#define HAS_I422TORAWROW_SSSE3
-#define HAS_I422TORGB24ROW_SSSE3
-#define HAS_I422TORGB565ROW_SSSE3
-#define HAS_I422TORGBAROW_SSSE3
-#define HAS_I422TOUYVYROW_SSE2
-#define HAS_I422TOYUY2ROW_SSE2
-#define HAS_I444TOARGBROW_SSSE3
-#define HAS_MERGEUVROW_SSE2
-#define HAS_MIRRORROW_SSE2
-#define HAS_MIRRORROW_SSSE3
-#define HAS_MIRRORROW_UV_SSSE3
-#define HAS_MIRRORUVROW_SSSE3
-#define HAS_NV12TOARGBROW_SSSE3
-#define HAS_NV12TORGB565ROW_SSSE3
-#define HAS_NV21TOARGBROW_SSSE3
-#define HAS_NV21TORGB565ROW_SSSE3
-#define HAS_RAWTOARGBROW_SSSE3
-#define HAS_RAWTOYROW_SSSE3
-#define HAS_RGB24TOARGBROW_SSSE3
-#define HAS_RGB24TOYROW_SSSE3
-#define HAS_RGB565TOARGBROW_SSE2
-#define HAS_RGBATOUVROW_SSSE3
-#define HAS_RGBATOYROW_SSSE3
-#define HAS_SETROW_X86
-#define HAS_SPLITUVROW_SSE2
-#define HAS_UYVYTOARGBROW_SSSE3
-#define HAS_UYVYTOUV422ROW_SSE2
-#define HAS_UYVYTOUVROW_SSE2
-#define HAS_UYVYTOYROW_SSE2
-#define HAS_YTOARGBROW_SSE2
-#define HAS_YUY2TOARGBROW_SSSE3
-#define HAS_YUY2TOUV422ROW_SSE2
-#define HAS_YUY2TOUVROW_SSE2
-#define HAS_YUY2TOYROW_SSE2
-#endif
-
-// The following are available on x64 Visual C:
-#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64)
-#define HAS_I422TOARGBROW_SSSE3
-#endif
+// clang >= 3.5.0 required for Arm64.
+#if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
+#if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
+#define LIBYUV_DISABLE_NEON
+#endif // clang >= 3.5
+#endif // __clang__
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
@@ -182,37 +71,158 @@ extern "C" {
#endif // __clang__
// Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
+#if defined(_M_IX86) && !defined(__clang__) && \
+ defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+// Conversions:
+#define HAS_ABGRTOUVROW_SSSE3
+#define HAS_ABGRTOYROW_SSSE3
+#define HAS_ARGB1555TOARGBROW_SSE2
+#define HAS_ARGB4444TOARGBROW_SSE2
+#define HAS_ARGBSETROW_X86
+#define HAS_ARGBSHUFFLEROW_SSE2
+#define HAS_ARGBSHUFFLEROW_SSSE3
+#define HAS_ARGBTOARGB1555ROW_SSE2
+#define HAS_ARGBTOARGB4444ROW_SSE2
+#define HAS_ARGBTORAWROW_SSSE3
+#define HAS_ARGBTORGB24ROW_SSSE3
+#define HAS_ARGBTORGB565DITHERROW_SSE2
+#define HAS_ARGBTORGB565ROW_SSE2
+#define HAS_ARGBTOUV422ROW_SSSE3
+#define HAS_ARGBTOUV444ROW_SSSE3
+#define HAS_ARGBTOUVJROW_SSSE3
+#define HAS_ARGBTOUVROW_SSSE3
+#define HAS_ARGBTOYJROW_SSSE3
+#define HAS_ARGBTOYROW_SSSE3
+#define HAS_BGRATOUVROW_SSSE3
+#define HAS_BGRATOYROW_SSSE3
+#define HAS_COPYROW_ERMS
+#define HAS_COPYROW_SSE2
+#define HAS_H422TOARGBROW_SSSE3
+#define HAS_I400TOARGBROW_SSE2
+// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
+// caveat: clangcl uses row_win.cc which works.
+#if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \
+ !defined(__i386__) || defined(_MSC_VER)
+// TODO(fbarchard): fix build error on x86 debug
+// https://code.google.com/p/libyuv/issues/detail?id=524
+#define HAS_I411TOARGBROW_SSSE3
+// TODO(fbarchard): fix build error on android_full_debug=1
+// https://code.google.com/p/libyuv/issues/detail?id=517
+#define HAS_I422ALPHATOARGBROW_SSSE3
+#endif
+#define HAS_I422TOARGB1555ROW_SSSE3
+#define HAS_I422TOARGB4444ROW_SSSE3
+#define HAS_I422TOARGBROW_SSSE3
+#define HAS_I422TORGB24ROW_SSSE3
+#define HAS_I422TORGB565ROW_SSSE3
+#define HAS_I422TORGBAROW_SSSE3
+#define HAS_I422TOUYVYROW_SSE2
+#define HAS_I422TOYUY2ROW_SSE2
+#define HAS_I444TOARGBROW_SSSE3
+#define HAS_J400TOARGBROW_SSE2
+#define HAS_J422TOARGBROW_SSSE3
+#define HAS_MERGEUVROW_SSE2
+#define HAS_MIRRORROW_SSSE3
+#define HAS_MIRRORROW_UV_SSSE3
+#define HAS_MIRRORUVROW_SSSE3
+#define HAS_NV12TOARGBROW_SSSE3
+#define HAS_NV12TORGB565ROW_SSSE3
+#define HAS_NV21TOARGBROW_SSSE3
+#define HAS_RAWTOARGBROW_SSSE3
+#define HAS_RAWTORGB24ROW_SSSE3
+#define HAS_RAWTOYROW_SSSE3
+#define HAS_RGB24TOARGBROW_SSSE3
+#define HAS_RGB24TOYROW_SSSE3
+#define HAS_RGB565TOARGBROW_SSE2
+#define HAS_RGBATOUVROW_SSSE3
+#define HAS_RGBATOYROW_SSSE3
+#define HAS_SETROW_ERMS
+#define HAS_SETROW_X86
+#define HAS_SPLITUVROW_SSE2
+#define HAS_UYVYTOARGBROW_SSSE3
+#define HAS_UYVYTOUV422ROW_SSE2
+#define HAS_UYVYTOUVROW_SSE2
+#define HAS_UYVYTOYROW_SSE2
+#define HAS_YUY2TOARGBROW_SSSE3
+#define HAS_YUY2TOUV422ROW_SSE2
+#define HAS_YUY2TOUVROW_SSE2
+#define HAS_YUY2TOYROW_SSE2
+
+// Effects:
+#define HAS_ARGBADDROW_SSE2
+#define HAS_ARGBAFFINEROW_SSE2
+#define HAS_ARGBATTENUATEROW_SSSE3
+#define HAS_ARGBBLENDROW_SSSE3
+#define HAS_ARGBCOLORMATRIXROW_SSSE3
+#define HAS_ARGBCOLORTABLEROW_X86
+#define HAS_ARGBCOPYALPHAROW_SSE2
+#define HAS_ARGBCOPYYTOALPHAROW_SSE2
+#define HAS_ARGBGRAYROW_SSSE3
+#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
+#define HAS_ARGBMIRRORROW_SSE2
+#define HAS_ARGBMULTIPLYROW_SSE2
+#define HAS_ARGBPOLYNOMIALROW_SSE2
+#define HAS_ARGBQUANTIZEROW_SSE2
+#define HAS_ARGBSEPIAROW_SSSE3
+#define HAS_ARGBSHADEROW_SSE2
+#define HAS_ARGBSUBTRACTROW_SSE2
+#define HAS_ARGBUNATTENUATEROW_SSE2
+#define HAS_COMPUTECUMULATIVESUMROW_SSE2
+#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+#define HAS_INTERPOLATEROW_SSSE3
+#define HAS_RGBCOLORTABLEROW_X86
+#define HAS_SOBELROW_SSE2
+#define HAS_SOBELTOPLANEROW_SSE2
+#define HAS_SOBELXROW_SSE2
+#define HAS_SOBELXYROW_SSE2
+#define HAS_SOBELYROW_SSE2
+#define HAS_BLENDPLANEROW_SSSE3
+#endif
+
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
-// Effects:
-#define HAS_ARGBPOLYNOMIALROW_AVX2
-#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBCOPYALPHAROW_AVX2
#define HAS_ARGBCOPYYTOALPHAROW_AVX2
-#endif
-
-// The following are require VS2012.
-// TODO(fbarchard): Port to gcc.
-#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
+#define HAS_ARGBMIRRORROW_AVX2
+#define HAS_ARGBPOLYNOMIALROW_AVX2
+#define HAS_ARGBSHUFFLEROW_AVX2
+#define HAS_ARGBTORGB565DITHERROW_AVX2
#define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
-#define HAS_HALFROW_AVX2
+#define HAS_COPYROW_AVX
+#define HAS_H422TOARGBROW_AVX2
+#define HAS_I400TOARGBROW_AVX2
+#if !(defined(_DEBUG) && defined(__i386__))
+// TODO(fbarchard): fix build error on android_full_debug=1
+// https://code.google.com/p/libyuv/issues/detail?id=517
+#define HAS_I422ALPHATOARGBROW_AVX2
+#endif
+#define HAS_I444TOARGBROW_AVX2
#define HAS_I422TOARGBROW_AVX2
+#define HAS_I422TORGB24ROW_AVX2
+#define HAS_I422TORGBAROW_AVX2
#define HAS_INTERPOLATEROW_AVX2
+#define HAS_J422TOARGBROW_AVX2
#define HAS_MERGEUVROW_AVX2
#define HAS_MIRRORROW_AVX2
+#define HAS_NV12TOARGBROW_AVX2
+#define HAS_NV21TOARGBROW_AVX2
#define HAS_SPLITUVROW_AVX2
+#define HAS_UYVYTOARGBROW_AVX2
#define HAS_UYVYTOUV422ROW_AVX2
#define HAS_UYVYTOUVROW_AVX2
#define HAS_UYVYTOYROW_AVX2
+#define HAS_YUY2TOARGBROW_AVX2
#define HAS_YUY2TOUV422ROW_AVX2
#define HAS_YUY2TOUVROW_AVX2
#define HAS_YUY2TOYROW_AVX2
@@ -220,131 +230,41 @@ extern "C" {
// Effects:
#define HAS_ARGBADDROW_AVX2
#define HAS_ARGBATTENUATEROW_AVX2
-#define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
-#endif // defined(VISUALC_HAS_AVX2)
-
-// The following are Yasm x86 only:
-// TODO(fbarchard): Port AVX2 to inline.
-#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
- (defined(_M_IX86) || defined(_M_X64) || \
- defined(__x86_64__) || defined(__i386__))
-#define HAS_MERGEUVROW_AVX2
-#define HAS_MERGEUVROW_MMX
-#define HAS_SPLITUVROW_AVX2
-#define HAS_SPLITUVROW_MMX
-#define HAS_UYVYTOYROW_AVX2
-#define HAS_UYVYTOYROW_MMX
-#define HAS_YUY2TOYROW_AVX2
-#define HAS_YUY2TOYROW_MMX
+#define HAS_BLENDPLANEROW_AVX2
#endif
-// The following are disabled when SSSE3 is available:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
- !defined(LIBYUV_SSSE3_ONLY)
-#define HAS_ARGBBLENDROW_SSE2
-#define HAS_ARGBATTENUATEROW_SSE2
-#define HAS_MIRRORROW_SSE2
+// The following are available for AVX2 Visual C and clangcl 32 bit:
+// TODO(fbarchard): Port to gcc.
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
+ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
+#define HAS_ARGB1555TOARGBROW_AVX2
+#define HAS_ARGB4444TOARGBROW_AVX2
+#define HAS_ARGBTOARGB1555ROW_AVX2
+#define HAS_ARGBTOARGB4444ROW_AVX2
+#define HAS_ARGBTORGB565ROW_AVX2
+#define HAS_I411TOARGBROW_AVX2
+#define HAS_I422TOARGB1555ROW_AVX2
+#define HAS_I422TOARGB4444ROW_AVX2
+#define HAS_I422TORGB565ROW_AVX2
+#define HAS_J400TOARGBROW_AVX2
+#define HAS_NV12TORGB565ROW_AVX2
+#define HAS_RGB565TOARGBROW_AVX2
#endif
-// The following are available on arm64 platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-// #define HAS_I444TOARGBROW_NEON
-// #define HAS_I422TOARGBROW_NEON
-// #define HAS_I411TOARGBROW_NEON
-// #define HAS_I422TOBGRAROW_NEON
-// #define HAS_I422TOABGRROW_NEON
-// #define HAS_I422TORGBAROW_NEON
-// #define HAS_I422TORGB24ROW_NEON
-// #define HAS_I422TORAWROW_NEON
-// #define HAS_I422TORGB565ROW_NEON
-// #define HAS_I422TOARGB1555ROW_NEON
-// #define HAS_I422TOARGB4444ROW_NEON
-// #define HAS_YTOARGBROW_NEON
-// #define HAS_I400TOARGBROW_NEON
-// #define HAS_NV12TOARGBROW_NEON
-// #define HAS_NV21TOARGBROW_NEON
-// #define HAS_NV12TORGB565ROW_NEON
-// #define HAS_NV21TORGB565ROW_NEON
-// #define HAS_YUY2TOARGBROW_NEON
-// #define HAS_UYVYTOARGBROW_NEON
-#define HAS_SPLITUVROW_NEON
-#define HAS_MERGEUVROW_NEON
-#define HAS_COPYROW_NEON
-#define HAS_SETROW_NEON
-#define HAS_ARGBSETROWS_NEON
-#define HAS_MIRRORROW_NEON
-#define HAS_MIRRORUVROW_NEON
-#define HAS_ARGBMIRRORROW_NEON
-#define HAS_RGB24TOARGBROW_NEON
-#define HAS_RAWTOARGBROW_NEON
-// #define HAS_RGB565TOARGBROW_NEON
-// #define HAS_ARGB1555TOARGBROW_NEON
-// #define HAS_ARGB4444TOARGBROW_NEON
-#define HAS_ARGBTORGB24ROW_NEON
-#define HAS_ARGBTORAWROW_NEON
-#define HAS_YUY2TOYROW_NEON
-#define HAS_UYVYTOYROW_NEON
-#define HAS_YUY2TOUV422ROW_NEON
-#define HAS_UYVYTOUV422ROW_NEON
-#define HAS_YUY2TOUVROW_NEON
-#define HAS_UYVYTOUVROW_NEON
-#define HAS_HALFROW_NEON
-#define HAS_ARGBTOBAYERROW_NEON
-#define HAS_ARGBTOBAYERGGROW_NEON
-#define HAS_ARGBSHUFFLEROW_NEON
-#define HAS_I422TOYUY2ROW_NEON
-#define HAS_I422TOUYVYROW_NEON
-// #define HAS_ARGBTORGB565ROW_NEON
-// #define HAS_ARGBTOARGB1555ROW_NEON
-// #define HAS_ARGBTOARGB4444ROW_NEON
-#define HAS_ARGBTOYROW_NEON
-#define HAS_ARGBTOYJROW_NEON
-#define HAS_ARGBTOUV444ROW_NEON
-#define HAS_ARGBTOUV422ROW_NEON
-#define HAS_ARGBTOUV411ROW_NEON
-// #define HAS_ARGBTOUVROW_NEON
-// #define HAS_ARGBTOUVJROW_NEON
-// #define HAS_BGRATOUVROW_NEON
-// #define HAS_ABGRTOUVROW_NEON
-// #define HAS_RGBATOUVROW_NEON
-// #define HAS_RGB24TOUVROW_NEON
-// #define HAS_RAWTOUVROW_NEON
-// #define HAS_RGB565TOUVROW_NEON
-// #define HAS_ARGB1555TOUVROW_NEON
-// #define HAS_ARGB4444TOUVROW_NEON
-// #define HAS_RGB565TOYROW_NEON
-// #define HAS_ARGB1555TOYROW_NEON
-// #define HAS_ARGB4444TOYROW_NEON
-#define HAS_BGRATOYROW_NEON
-#define HAS_ABGRTOYROW_NEON
-#define HAS_RGBATOYROW_NEON
-#define HAS_RGB24TOYROW_NEON
-#define HAS_RAWTOYROW_NEON
-#define HAS_INTERPOLATEROW_NEON
-#define HAS_ARGBBLENDROW_NEON
-#define HAS_ARGBATTENUATEROW_NEON
-#define HAS_ARGBQUANTIZEROW_NEON
-#define HAS_ARGBSHADEROW_NEON
-#define HAS_ARGBGRAYROW_NEON
-#define HAS_ARGBSEPIAROW_NEON
-#define HAS_ARGBCOLORMATRIXROW_NEON
-#define HAS_ARGBMULTIPLYROW_NEON
-#define HAS_ARGBADDROW_NEON
-#define HAS_ARGBSUBTRACTROW_NEON
-#define HAS_SOBELROW_NEON
-#define HAS_SOBELTOPLANEROW_NEON
-#define HAS_SOBELXYROW_NEON
-#define HAS_SOBELXROW_NEON
-#define HAS_SOBELYROW_NEON
+// The following are also available on x64 Visual C.
+#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
+ (!defined(__clang__) || defined(__SSSE3__))
+#define HAS_I422ALPHATOARGBROW_SSSE3
+#define HAS_I422TOARGBROW_SSSE3
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+#define HAS_I422ALPHATOARGBROW_NEON
#define HAS_ABGRTOUVROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_ARGB1555TOARGBROW_NEON
@@ -353,46 +273,43 @@ extern "C" {
#define HAS_ARGB4444TOARGBROW_NEON
#define HAS_ARGB4444TOUVROW_NEON
#define HAS_ARGB4444TOYROW_NEON
+#define HAS_ARGBSETROW_NEON
#define HAS_ARGBTOARGB1555ROW_NEON
#define HAS_ARGBTOARGB4444ROW_NEON
-#define HAS_ARGBTOBAYERROW_NEON
-#define HAS_ARGBTOBAYERGGROW_NEON
#define HAS_ARGBTORAWROW_NEON
#define HAS_ARGBTORGB24ROW_NEON
+#define HAS_ARGBTORGB565DITHERROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOUV411ROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
-#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOUVJROW_NEON
-#define HAS_ARGBTOYROW_NEON
+#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
+#define HAS_ARGBTOYROW_NEON
#define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_COPYROW_NEON
-#define HAS_HALFROW_NEON
#define HAS_I400TOARGBROW_NEON
#define HAS_I411TOARGBROW_NEON
-#define HAS_I422TOABGRROW_NEON
#define HAS_I422TOARGB1555ROW_NEON
#define HAS_I422TOARGB4444ROW_NEON
#define HAS_I422TOARGBROW_NEON
-#define HAS_I422TOBGRAROW_NEON
-#define HAS_I422TORAWROW_NEON
#define HAS_I422TORGB24ROW_NEON
#define HAS_I422TORGB565ROW_NEON
#define HAS_I422TORGBAROW_NEON
#define HAS_I422TOUYVYROW_NEON
#define HAS_I422TOYUY2ROW_NEON
#define HAS_I444TOARGBROW_NEON
+#define HAS_J400TOARGBROW_NEON
#define HAS_MERGEUVROW_NEON
#define HAS_MIRRORROW_NEON
#define HAS_MIRRORUVROW_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TOARGBROW_NEON
-#define HAS_NV21TORGB565ROW_NEON
#define HAS_RAWTOARGBROW_NEON
+#define HAS_RAWTORGB24ROW_NEON
#define HAS_RAWTOUVROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_RGB24TOARGBROW_NEON
@@ -409,7 +326,6 @@ extern "C" {
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_UYVYTOUVROW_NEON
#define HAS_UYVYTOYROW_NEON
-#define HAS_YTOARGBROW_NEON
#define HAS_YUY2TOARGBROW_NEON
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
@@ -419,21 +335,21 @@ extern "C" {
#define HAS_ARGBADDROW_NEON
#define HAS_ARGBATTENUATEROW_NEON
#define HAS_ARGBBLENDROW_NEON
+#define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBMIRRORROW_NEON
#define HAS_ARGBMULTIPLYROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON
#define HAS_ARGBSEPIAROW_NEON
#define HAS_ARGBSHADEROW_NEON
+#define HAS_ARGBSHUFFLEROW_NEON
#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_INTERPOLATEROW_NEON
#define HAS_SOBELROW_NEON
#define HAS_SOBELTOPLANEROW_NEON
-#define HAS_SOBELXYROW_NEON
#define HAS_SOBELXROW_NEON
+#define HAS_SOBELXYROW_NEON
#define HAS_SOBELYROW_NEON
-#define HAS_INTERPOLATEROW_NEON
-// TODO(fbarchard): Investigate neon unittest failure.
-// #define HAS_ARGBCOLORMATRIXROW_NEON
#endif
// The following are available on Mips platforms:
@@ -441,10 +357,8 @@ extern "C" {
(_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
#define HAS_COPYROW_MIPS
#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_I422TOABGRROW_MIPS_DSPR2
#define HAS_I422TOARGBROW_MIPS_DSPR2
-#define HAS_I422TOBGRAROW_MIPS_DSPR2
-#define HAS_INTERPOLATEROWS_MIPS_DSPR2
+#define HAS_INTERPOLATEROW_MIPS_DSPR2
#define HAS_MIRRORROW_MIPS_DSPR2
#define HAS_MIRRORUVROW_MIPS_DSPR2
#define HAS_SPLITUVROW_MIPS_DSPR2
@@ -453,6 +367,7 @@ extern "C" {
#if defined(_MSC_VER) && !defined(__CLR_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var
+#define SIMD_ALIGNED32(var) __declspec(align(64)) var
typedef __declspec(align(16)) int16 vec16[8];
typedef __declspec(align(16)) int32 vec32[4];
typedef __declspec(align(16)) int8 vec8[16];
@@ -465,26 +380,89 @@ typedef __declspec(align(32)) int8 lvec8[32];
typedef __declspec(align(32)) uint16 ulvec16[16];
typedef __declspec(align(32)) uint32 ulvec32[8];
typedef __declspec(align(32)) uint8 ulvec8[32];
-
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) && !defined(__pnacl__)
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
+#define SIMD_ALIGNED32(var) var __attribute__((aligned(64)))
typedef int16 __attribute__((vector_size(16))) vec16;
typedef int32 __attribute__((vector_size(16))) vec32;
typedef int8 __attribute__((vector_size(16))) vec8;
typedef uint16 __attribute__((vector_size(16))) uvec16;
typedef uint32 __attribute__((vector_size(16))) uvec32;
typedef uint8 __attribute__((vector_size(16))) uvec8;
+typedef int16 __attribute__((vector_size(32))) lvec16;
+typedef int32 __attribute__((vector_size(32))) lvec32;
+typedef int8 __attribute__((vector_size(32))) lvec8;
+typedef uint16 __attribute__((vector_size(32))) ulvec16;
+typedef uint32 __attribute__((vector_size(32))) ulvec32;
+typedef uint8 __attribute__((vector_size(32))) ulvec8;
#else
#define SIMD_ALIGNED(var) var
+#define SIMD_ALIGNED32(var) var
typedef int16 vec16[8];
typedef int32 vec32[4];
typedef int8 vec8[16];
typedef uint16 uvec16[8];
typedef uint32 uvec32[4];
typedef uint8 uvec8[16];
+typedef int16 lvec16[16];
+typedef int32 lvec32[8];
+typedef int8 lvec8[32];
+typedef uint16 ulvec16[16];
+typedef uint32 ulvec32[8];
+typedef uint8 ulvec8[32];
#endif
+#if defined(__aarch64__)
+// This struct is for Arm64 color conversion.
+struct YuvConstants {
+ uvec16 kUVToRB;
+ uvec16 kUVToRB2;
+ uvec16 kUVToG;
+ uvec16 kUVToG2;
+ vec16 kUVBiasBGR;
+ vec32 kYToRgb;
+};
+#elif defined(__arm__)
+// This struct is for ArmV7 color conversion.
+struct YuvConstants {
+ uvec8 kUVToRB;
+ uvec8 kUVToG;
+ vec16 kUVBiasBGR;
+ vec32 kYToRgb;
+};
+#else
+// This struct is for Intel color conversion.
+struct YuvConstants {
+ lvec8 kUVToB;
+ lvec8 kUVToG;
+ lvec8 kUVToR;
+ lvec16 kUVBiasB;
+ lvec16 kUVBiasG;
+ lvec16 kUVBiasR;
+ lvec16 kYToRgb;
+};
+
+// Offsets into YuvConstants structure
+#define KUVTOB 0
+#define KUVTOG 32
+#define KUVTOR 64
+#define KUVBIASB 96
+#define KUVBIASG 128
+#define KUVBIASR 160
+#define KYTORGB 192
+#endif
+
+// Conversion matrix for YUV to RGB
+extern const struct YuvConstants kYuvI601Constants; // BT.601
+extern const struct YuvConstants kYuvJPEGConstants; // JPeg color space
+extern const struct YuvConstants kYuvH709Constants; // BT.709
+
+// Conversion matrix for YVU to BGR
+extern const struct YuvConstants kYvuI601Constants; // BT.601
+extern const struct YuvConstants kYvuJPEGConstants; // JPeg color space
+extern const struct YuvConstants kYvuH709Constants; // BT.709
+
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
#else
@@ -492,24 +470,16 @@ typedef uint8 uvec8[16];
#endif
// NaCL macros for GCC x86 and x64.
-
-// TODO(nfullagar): When pepper_33 toolchain is distributed, default to
-// NEW_BINUTILS and remove all BUNDLEALIGN occurances.
#if defined(__native_client__)
#define LABELALIGN ".p2align 5\n"
#else
-#define LABELALIGN ".p2align 2\n"
+#define LABELALIGN
#endif
#if defined(__native_client__) && defined(__x86_64__)
-#if defined(NEW_BINUTILS)
+// r14 is used for MEMOP macros.
+#define NACL_R14 "r14",
#define BUNDLELOCK ".bundle_lock\n"
#define BUNDLEUNLOCK ".bundle_unlock\n"
-#define BUNDLEALIGN "\n"
-#else
-#define BUNDLELOCK "\n"
-#define BUNDLEUNLOCK "\n"
-#define BUNDLEALIGN ".p2align 5\n"
-#endif
#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
#define MEMLEA(offset, base) #offset "(%q" #base ")"
@@ -534,8 +504,19 @@ typedef uint8 uvec8[16];
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14),%" #arg "\n" \
BUNDLEUNLOCK
+#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
+ BUNDLELOCK \
+ "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
+ #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \
+ BUNDLEUNLOCK
+#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
+ BUNDLELOCK \
+ "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
+ #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \
+ BUNDLEUNLOCK
#else // defined(__native_client__) && defined(__x86_64__)
-#define BUNDLEALIGN "\n"
+#define NACL_R14
+#define BUNDLEALIGN
#define MEMACCESS(base) "(%" #base ")"
#define MEMACCESS2(offset, base) #offset "(%" #base ")"
#define MEMLEA(offset, base) #offset "(%" #base ")"
@@ -551,14 +532,19 @@ typedef uint8 uvec8[16];
#opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
#define MEMOPARG(opcode, offset, base, index, scale, arg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
+#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
+ #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \
+ #reg2 "\n"
+#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
+ #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
#endif // defined(__native_client__) && defined(__x86_64__)
#if defined(__arm__) || defined(__aarch64__)
#undef MEMACCESS
#if defined(__native_client__)
-#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
+#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
#else
-#define MEMACCESS(base) "\n"
+#define MEMACCESS(base)
#endif
#endif
@@ -566,161 +552,163 @@ void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_NEON(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
- int width);
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
int width);
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
void I422ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width);
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_rgb565,
- int width);
+void NV21ToARGBRow_NEON(const uint8* src_y,
+ const uint8* src_vu,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
+void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
+void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
+void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
+void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
+void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
+void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width);
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width);
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width);
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
+void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
+void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
+void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
+void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
+void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width);
+void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width);
+void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width);
+void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width);
+void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width);
+void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width);
+void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width);
+void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
+void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
+void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
+void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
+void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
+void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
+void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
+void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
+void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width);
+void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width);
+void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width);
+void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
+void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width);
+void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
+void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y,
+ int width);
+void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y,
+ int width);
void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
@@ -736,16 +724,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width);
void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
@@ -757,33 +735,33 @@ void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
@@ -807,15 +785,11 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
void ARGBToUV444Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
@@ -825,13 +799,18 @@ void ARGBToUV422Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV411Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVJ422Row_C(const uint8* src_argb,
+ uint8* dst_u, uint8* dst_v, int width);
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
+void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
+void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
+void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
+void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
@@ -843,28 +822,30 @@ void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
+void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
+void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width);
+void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width);
+void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width);
void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
- uint8* dst_v, int pix);
+ int width);
void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
+ int width);
void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width);
@@ -874,8 +855,6 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width);
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width);
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
- uint8* dst_uv, int width);
void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width);
void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
@@ -884,431 +863,611 @@ void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width);
void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
+void CopyRow_AVX(const uint8* src, uint8* dst, int count);
void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
-void CopyRow_X86(const uint8* src, uint8* dst, int count);
void CopyRow_NEON(const uint8* src, uint8* dst, int count);
void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
void CopyRow_C(const uint8* src, uint8* dst, int count);
+void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count);
+void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count);
+void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count);
void CopyRow_16_C(const uint16* src, uint16* dst, int count);
void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
+ int width);
+void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
+ int width);
void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb,
+ int width);
+void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb,
+ int width);
-void SetRow_X86(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
- int dst_stride, int height);
-void SetRow_NEON(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
- int dst_stride, int height);
-void SetRow_C(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
- int height);
+void SetRow_C(uint8* dst, uint8 v8, int count);
+void SetRow_X86(uint8* dst, uint8 v8, int count);
+void SetRow_ERMS(uint8* dst, uint8 v8, int count);
+void SetRow_NEON(uint8* dst, uint8 v8, int count);
+void SetRow_Any_X86(uint8* dst, uint8 v8, int count);
+void SetRow_Any_NEON(uint8* dst, uint8 v8, int count);
+
+void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count);
+void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count);
+void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count);
+void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count);
// ARGBShufflers for BGRAToARGB etc.
void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
+ const uint8* shuffler, int width);
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
+void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width);
+void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
+void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
+void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width);
void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
+ int width);
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
+ int width);
+void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width);
+void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
+ int width);
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
+void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width);
+void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width);
+void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width);
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
+ int width);
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
+ int width);
+void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width);
+void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width);
+void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width);
+void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width);
+void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
+void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb,
+ int width);
+void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
+void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
+
void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
- int pix);
+ int width);
void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
+ int width);
void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
+ int width);
+void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb,
+ int width);
+void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb,
+ int width);
+
+void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb,
+ int width);
+void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width);
+void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
- int pix);
+ int width);
void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
+ int width);
void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
+ int width);
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width);
+void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width);
+void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width);
-void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
+void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width);
+
+void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
+
+void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
+void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
+void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
+void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
+void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
void I444ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGBRow_C(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I411ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_C(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void NV21ToRGB565Row_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
void NV12ToRGB565Row_C(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
+ const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_C(const uint8* src_yuy2,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_C(const uint8* src_uyvy,
uint8* dst_argb,
- int width);
-void I422ToBGRARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
int width);
-void I422ToRAWRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
void I422ToARGB4444Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width);
-void YToARGBRow_C(const uint8* src_y,
- uint8* dst_argb,
- int width);
void I422ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I444ToARGBRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I411ToARGBRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void NV12ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
+void NV12ToARGBRow_AVX2(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void NV12ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
+void NV12ToRGB565Row_AVX2(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_SSSE3(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_AVX2(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void I422ToBGRARow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
+void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-// RGB24/RAW are unaligned.
+void I422ToRGB565Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
int width);
-void I422ToRAWRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-
-void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
+void I422ToRGB24Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_Any_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I444ToARGBRow_Any_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I411ToARGBRow_Any_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
+ const uint8* src_vu,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
+void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
+void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width);
-// RGB24/RAW are unaligned.
+void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void YToARGBRow_SSE2(const uint8* src_y,
- uint8* dst_argb,
- int width);
-void YToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width);
-void YToARGBRow_Any_SSE2(const uint8* src_y,
- uint8* dst_argb,
- int width);
-void YToARGBRow_Any_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width);
+void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+
+void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
// ARGB preattenuated alpha blend.
void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
uint8* dst_argb, int width);
-void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
uint8* dst_argb, int width);
void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
uint8* dst_argb, int width);
+// Unattenuated planar alpha blend.
+void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width);
+void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width);
+void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width);
+void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width);
+void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width);
+
// ARGB multiply images. Same API as Blend, but these require
// pointer and width alignment for SSE2.
void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
@@ -1359,247 +1518,202 @@ void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
uint8* dst_argb, int width);
-void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
+ int width);
+void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
+ int width);
-void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width);
+void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width);
+
+void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
+ int width);
+void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
+ int width);
+
+void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
+void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
+ int width);
+void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
+ int width);
+void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width);
void I444ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_Any_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ const uint8* src_a,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I411ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
- int width);
-void I422ToBGRARow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void I422ToRAWRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
+ const uint8* src_vu,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
- int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
- int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix);
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void YUY2ToUV422Row_C(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix);
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_NEON(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_C(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
+void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-
-void HalfRow_C(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-
-void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
- uint16* dst_uv, int pix);
-
-void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
+ uint8* dst_u, uint8* dst_v, int width);
void I422ToYUY2Row_C(const uint8* src_y,
const uint8* src_u,
@@ -1644,7 +1758,6 @@ void I422ToUYVYRow_Any_NEON(const uint8* src_y,
// Effects related row functions.
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
@@ -1724,9 +1837,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr,
int width, int source_y_fraction);
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
@@ -1736,30 +1846,21 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
-void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
+void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride_ptr, int width,
+ int source_y_fraction);
void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
-void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
-void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
+void InterpolateRow_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride_ptr, int width,
+ int source_y_fraction);
void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
ptrdiff_t src_stride_ptr,
@@ -1796,6 +1897,18 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
+void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+ uint8* dst_argb, int width);
+void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+ uint8* dst_argb, int width);
+void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+ uint8* dst_y, int width);
+void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+ uint8* dst_y, int width);
+void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+ uint8* dst_argb, int width);
+void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+ uint8* dst_argb, int width);
void ARGBPolynomialRow_C(const uint8* src_argb,
uint8* dst_argb, const float* poly,
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/scale.h b/TMessagesProj/jni/libyuv/include/libyuv/scale.h
index a3bc07e0f..102158d1a 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/scale.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/scale.h
@@ -34,6 +34,7 @@ void ScalePlane(const uint8* src, int src_stride,
int dst_width, int dst_height,
enum FilterMode filtering);
+LIBYUV_API
void ScalePlane_16(const uint16* src, int src_stride,
int src_width, int src_height,
uint16* dst, int dst_stride,
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/scale_argb.h b/TMessagesProj/jni/libyuv/include/libyuv/scale_argb.h
index 0c9b36257..b56cf5209 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/scale_argb.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/scale_argb.h
@@ -35,7 +35,6 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
-// TODO(fbarchard): Implement this.
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/scale_row.h b/TMessagesProj/jni/libyuv/include/libyuv/scale_row.h
index 70e6bc55b..45127bda3 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/scale_row.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/scale_row.h
@@ -12,53 +12,79 @@
#define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "libyuv/basic_types.h"
+#include "libyuv/scale.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
- defined(TARGET_IPHONE_SIMULATOR)
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
+// GCC >= 4.7.0 required for AVX2.
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
+#define GCC_HAS_AVX2 1
+#endif // GNUC >= 4.7
+#endif // __GNUC__
+
+// clang >= 3.4.0 required for AVX2.
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
+#define CLANG_HAS_AVX2 1
+#endif // clang >= 3.4
+#endif // __clang__
+
+// Visual C 2012 required for AVX2.
+#if defined(_M_IX86) && !defined(__clang__) && \
+ defined(_MSC_VER) && _MSC_VER >= 1700
+#define VISUALC_HAS_AVX2 1
+#endif // VisualStudio >= 2012
+
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SCALEROWDOWN2_SSE2
-#define HAS_SCALEROWDOWN4_SSE2
-#define HAS_SCALEROWDOWN34_SSSE3
-#define HAS_SCALEROWDOWN38_SSSE3
-#define HAS_SCALEADDROWS_SSE2
-#define HAS_SCALEFILTERCOLS_SSSE3
-#define HAS_SCALECOLSUP2_SSE2
+#define HAS_FIXEDDIV1_X86
+#define HAS_FIXEDDIV_X86
+#define HAS_SCALEARGBCOLS_SSE2
+#define HAS_SCALEARGBCOLSUP2_SSE2
+#define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEARGBROWDOWN2_SSE2
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-#define HAS_SCALEARGBCOLS_SSE2
-#define HAS_SCALEARGBFILTERCOLS_SSSE3
-#define HAS_SCALEARGBCOLSUP2_SSE2
-#define HAS_FIXEDDIV_X86
-#define HAS_FIXEDDIV1_X86
+#define HAS_SCALECOLSUP2_SSE2
+#define HAS_SCALEFILTERCOLS_SSSE3
+#define HAS_SCALEROWDOWN2_SSSE3
+#define HAS_SCALEROWDOWN34_SSSE3
+#define HAS_SCALEROWDOWN38_SSSE3
+#define HAS_SCALEROWDOWN4_SSSE3
+#define HAS_SCALEADDROW_SSE2
+#endif
+
+// The following are available on all x86 platforms, but
+// require VS2012, clang 3.4 or gcc 4.7.
+// The code supports NaCL but requires a new compiler and validator.
+#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
+ defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+#define HAS_SCALEADDROW_AVX2
+#define HAS_SCALEROWDOWN2_AVX2
+#define HAS_SCALEROWDOWN4_AVX2
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN4_NEON
-#define HAS_SCALEROWDOWN34_NEON
-#define HAS_SCALEROWDOWN38_NEON
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-#define HAS_SCALEARGBROWDOWN2_NEON
-#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__aarch64__) || defined(LIBYUV_NEON))
-#define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN4_NEON
-#define HAS_SCALEROWDOWN34_NEON
-#define HAS_SCALEROWDOWN38_NEON
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
+#define HAS_SCALEFILTERCOLS_NEON
+#define HAS_SCALEROWDOWN2_NEON
+#define HAS_SCALEROWDOWN34_NEON
+#define HAS_SCALEROWDOWN38_NEON
+#define HAS_SCALEROWDOWN4_NEON
+#define HAS_SCALEARGBFILTERCOLS_NEON
#endif
// The following are available on Mips platforms:
@@ -172,10 +198,8 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
-void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height);
+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
@@ -202,25 +226,28 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+// Specialized scalers for x86.
+void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
+
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
@@ -237,46 +264,124 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width,
- int src_height);
+void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+
+void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
+ ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+
+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
+
+
+// ARGB Column functions
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
-// Row functions.
+void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx);
+void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx);
+void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx);
+void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx);
+
+// ARGB Row functions
+void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
+void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
+void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
+ ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
+void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
+ ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
+
+void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ int src_stepx, uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ int src_stepx,
+ uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
+void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+ int src_stepx,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+ int src_stepx,
+ uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8* dst_argb, int dst_width);
// ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation.
@@ -284,7 +389,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
// Note - not static due to reuse in convert for 444 to 420.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
-
+void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
@@ -319,6 +425,42 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
+void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
+void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width);
+void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+// 32 -> 12
+void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+// 32x3 -> 12x1
+void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width);
+
+void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+
+void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
+ int dst_width, int x, int dx);
+
+void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
+ int dst_width, int x, int dx);
+
+
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/version.h b/TMessagesProj/jni/libyuv/include/libyuv/version.h
index c6952040b..ab0f88d9e 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/version.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1074
+#define LIBYUV_VERSION 1561
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
diff --git a/TMessagesProj/jni/libyuv/include/libyuv/video_common.h b/TMessagesProj/jni/libyuv/include/libyuv/video_common.h
index 91acc2ffc..ad934e424 100644
--- a/TMessagesProj/jni/libyuv/include/libyuv/video_common.h
+++ b/TMessagesProj/jni/libyuv/include/libyuv/video_common.h
@@ -62,7 +62,7 @@ enum FourCC {
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
- FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
+ FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
@@ -75,7 +75,7 @@ enum FourCC {
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
- // 4 Secondary RGB formats: 4 Bayer Patterns.
+ // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
@@ -90,7 +90,8 @@ enum FourCC {
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
- FOURCC_J400 = FOURCC('J', '4', '0', '0'),
+ FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
+ FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
@@ -150,6 +151,7 @@ enum FourCCBpp {
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
+ FOURCC_BPP_H420 = 12,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
diff --git a/TMessagesProj/jni/libyuv/source/compare.cc b/TMessagesProj/jni/libyuv/source/compare.cc
index dc715e019..e3846bdfd 100644
--- a/TMessagesProj/jni/libyuv/source/compare.cc
+++ b/TMessagesProj/jni/libyuv/source/compare.cc
@@ -17,38 +17,23 @@
#endif
#include "libyuv/basic_types.h"
+#include "libyuv/compare_row.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
+#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
-
-// This module is for Visual C x86
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || \
- (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
-#define HAS_HASHDJB2_SSE41
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
-
-#if _MSC_VER >= 1700
-#define HAS_HASHDJB2_AVX2
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
-#endif
-
-#endif // HAS_HASHDJB2_SSE41
-
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
- uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
+ uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
+ HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
@@ -78,22 +63,53 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
return seed;
}
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
-#define HAS_SUMSQUAREERROR_NEON
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
-#endif
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SUMSQUAREERROR_SSE2
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-// Visual C 2012 required for AVX2.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
-#define HAS_SUMSQUAREERROR_AVX2
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
-#endif
+static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
+ return FOURCC_BGRA;
+ }
+ if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
+ return FOURCC_ARGB;
+ }
+ if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
+ return FOURCC_BGRA;
+ }
+ if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
+ return FOURCC_ARGB;
+ }
+ argb += 8;
+ }
+ if (width & 1) {
+ if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
+ return FOURCC_BGRA;
+ }
+ if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
+ return FOURCC_ARGB;
+ }
+ }
+ return 0;
+}
+
+// Scan an opaque argb image and return fourcc based on alpha offset.
+// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
+LIBYUV_API
+uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
+ uint32 fourcc = 0;
+ int h;
+
+ // Coalesce rows.
+ if (stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ stride_argb = 0;
+ }
+ for (h = 0; h < height && fourcc == 0; ++h) {
+ fourcc = ARGBDetectRow_C(argb, width);
+ argb += stride_argb;
+ }
+ return fourcc;
+}
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
@@ -114,8 +130,7 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
}
#endif
#if defined(HAS_SUMSQUAREERROR_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
// Note only used for multiples of 16 so count is not checked.
SumSquareError = SumSquareError_SSE2;
}
diff --git a/TMessagesProj/jni/libyuv/source/compare_common.cc b/TMessagesProj/jni/libyuv/source/compare_common.cc
index c546b5182..42fc58935 100644
--- a/TMessagesProj/jni/libyuv/source/compare_common.cc
+++ b/TMessagesProj/jni/libyuv/source/compare_common.cc
@@ -10,6 +10,8 @@
#include "libyuv/basic_types.h"
+#include "libyuv/compare_row.h"
+
#ifdef __cplusplus
namespace libyuv {
extern "C" {
diff --git a/TMessagesProj/jni/libyuv/source/compare_posix.cc b/TMessagesProj/jni/libyuv/source/compare_gcc.cc
similarity index 90%
rename from TMessagesProj/jni/libyuv/source/compare_posix.cc
rename to TMessagesProj/jni/libyuv/source/compare_gcc.cc
index ac361190e..1b83edb16 100644
--- a/TMessagesProj/jni/libyuv/source/compare_posix.cc
+++ b/TMessagesProj/jni/libyuv/source/compare_gcc.cc
@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
@@ -16,20 +18,21 @@ namespace libyuv {
extern "C" {
#endif
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
- asm volatile ( // NOLINT
+ asm volatile (
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
- "movdqa " MEMACCESS(1) ",%%xmm2 \n"
+ "movdqu " MEMACCESS(1) ",%%xmm2 \n"
"lea " MEMLEA(0x10, 1) ",%1 \n"
- "sub $0x10,%2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
@@ -41,6 +44,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
"pshufd $0xee,%%xmm0,%%xmm1 \n"
@@ -53,20 +57,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
"+r"(src_b), // %1
"+r"(count), // %2
"=g"(sse) // %3
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- ); // NOLINT
+ :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+ );
return sse;
}
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
@@ -95,7 +90,7 @@ static uvec32 kHashMul3 = {
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
- asm volatile ( // NOLINT
+ asm volatile (
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
@@ -124,13 +119,13 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
"pmulld %%xmm5,%%xmm1 \n"
"paddd %%xmm4,%%xmm3 \n"
"paddd %%xmm2,%%xmm1 \n"
- "sub $0x10,%1 \n"
"paddd %%xmm3,%%xmm1 \n"
"pshufd $0xe,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"pshufd $0x1,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
+ "sub $0x10,%1 \n"
"jg 1b \n"
"movd %%xmm0,%3 \n"
: "+r"(src), // %0
@@ -143,10 +138,8 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
"m"(kHashMul2), // %7
"m"(kHashMul3) // %8
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- ); // NOLINT
+ );
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
diff --git a/TMessagesProj/jni/libyuv/source/compare_neon.cc b/TMessagesProj/jni/libyuv/source/compare_neon.cc
index 55052c0ee..49aa3b4ee 100644
--- a/TMessagesProj/jni/libyuv/source/compare_neon.cc
+++ b/TMessagesProj/jni/libyuv/source/compare_neon.cc
@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
@@ -16,7 +18,8 @@ namespace libyuv {
extern "C" {
#endif
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
@@ -26,7 +29,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n"
@@ -56,46 +58,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
return sse;
}
-#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
- volatile uint32 sse;
- asm volatile (
- "eor v16.16b, v16.16b, v16.16b \n"
- "eor v18.16b, v18.16b, v18.16b \n"
- "eor v17.16b, v17.16b, v17.16b \n"
- "eor v19.16b, v19.16b, v19.16b \n"
-
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n"
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n"
- "subs %2, %2, #16 \n"
- "usubl v2.8h, v0.8b, v1.8b \n"
- "usubl2 v3.8h, v0.16b, v1.16b \n"
- "smlal v16.4s, v2.4h, v2.4h \n"
- "smlal v17.4s, v3.4h, v3.4h \n"
- "smlal2 v18.4s, v2.8h, v2.8h \n"
- "smlal2 v19.4s, v3.8h, v3.8h \n"
- "bgt 1b \n"
-
- "add v16.4s, v16.4s, v17.4s \n"
- "add v18.4s, v18.4s, v19.4s \n"
- "add v19.4s, v16.4s, v18.4s \n"
- "addv s0, v19.4s \n"
- "fmov %w3, s0 \n"
- : "+r"(src_a),
- "+r"(src_b),
- "+r"(count),
- "=r"(sse)
- :
- : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
- return sse;
-}
-
-#endif // __ARM_NEON__
+#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/compare_neon64.cc b/TMessagesProj/jni/libyuv/source/compare_neon64.cc
new file mode 100644
index 000000000..f9c7df98c
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/source/compare_neon64.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
+ volatile uint32 sse;
+ asm volatile (
+ "eor v16.16b, v16.16b, v16.16b \n"
+ "eor v18.16b, v18.16b, v18.16b \n"
+ "eor v17.16b, v17.16b, v17.16b \n"
+ "eor v19.16b, v19.16b, v19.16b \n"
+
+ "1: \n"
+ MEMACCESS(0)
+ "ld1 {v0.16b}, [%0], #16 \n"
+ MEMACCESS(1)
+ "ld1 {v1.16b}, [%1], #16 \n"
+ "subs %w2, %w2, #16 \n"
+ "usubl v2.8h, v0.8b, v1.8b \n"
+ "usubl2 v3.8h, v0.16b, v1.16b \n"
+ "smlal v16.4s, v2.4h, v2.4h \n"
+ "smlal v17.4s, v3.4h, v3.4h \n"
+ "smlal2 v18.4s, v2.8h, v2.8h \n"
+ "smlal2 v19.4s, v3.8h, v3.8h \n"
+ "b.gt 1b \n"
+
+ "add v16.4s, v16.4s, v17.4s \n"
+ "add v18.4s, v18.4s, v19.4s \n"
+ "add v19.4s, v16.4s, v18.4s \n"
+ "addv s0, v19.4s \n"
+ "fmov %w3, s0 \n"
+ : "+r"(src_a),
+ "+r"(src_b),
+ "+r"(count),
+ "=r"(sse)
+ :
+ : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
+ return sse;
+}
+
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/TMessagesProj/jni/libyuv/source/compare_win.cc b/TMessagesProj/jni/libyuv/source/compare_win.cc
index 99831651f..dc86fe25b 100644
--- a/TMessagesProj/jni/libyuv/source/compare_win.cc
+++ b/TMessagesProj/jni/libyuv/source/compare_win.cc
@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
@@ -16,9 +18,10 @@ namespace libyuv {
extern "C" {
#endif
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
@@ -27,13 +30,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pxor xmm0, xmm0
pxor xmm5, xmm5
- align 4
wloop:
- movdqa xmm1, [eax]
+ movdqu xmm1, [eax]
lea eax, [eax + 16]
- movdqa xmm2, [edx]
+ movdqu xmm2, [edx]
lea edx, [edx + 16]
- sub ecx, 16
movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2
psubusb xmm2, xmm3
@@ -45,6 +46,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pmaddwd xmm2, xmm2
paddd xmm0, xmm1
paddd xmm0, xmm2
+ sub ecx, 16
jg wloop
pshufd xmm1, xmm0, 0xee
@@ -60,7 +62,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable: 4752)
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
@@ -70,12 +72,10 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
- align 4
wloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32]
- sub ecx, 32
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3
@@ -85,6 +85,7 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2
+ sub ecx, 32
jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
@@ -100,42 +101,33 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
}
#endif // _MSC_VER >= 1700
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
-static uvec32 kHashMul0 = {
+uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
+uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
-static uvec32 kHashMul1 = {
+uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
-static uvec32 kHashMul2 = {
+uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
-static uvec32 kHashMul3 = {
+uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
-// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
-// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
-// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
-// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
-// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
-#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
- _asm _emit 0x40 _asm _emit reg
-
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
@@ -143,34 +135,32 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck
- movdqa xmm6, kHash16x33
+ movdqa xmm6, xmmword ptr kHash16x33
- align 4
wloop:
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
- pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
- movdqa xmm5, kHashMul0
+ pmulld xmm0, xmm6 // hash *= 33 ^ 16
+ movdqa xmm5, xmmword ptr kHashMul0
movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3]
- pmulld(0xdd) // pmulld xmm3, xmm5
- movdqa xmm5, kHashMul1
+ pmulld xmm3, xmm5
+ movdqa xmm5, xmmword ptr kHashMul1
movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7]
- pmulld(0xe5) // pmulld xmm4, xmm5
- movdqa xmm5, kHashMul2
+ pmulld xmm4, xmm5
+ movdqa xmm5, xmmword ptr kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11]
- pmulld(0xd5) // pmulld xmm2, xmm5
- movdqa xmm5, kHashMul3
+ pmulld xmm2, xmm5
+ movdqa xmm5, xmmword ptr kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
- pmulld(0xcd) // pmulld xmm1, xmm5
+ pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
- sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
@@ -178,6 +168,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
+ sub ecx, 16
jg wloop
movd eax, xmm0 // return hash
@@ -187,44 +178,43 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
- movd xmm0, [esp + 12] // seed
- movdqa xmm6, kHash16x33
+ vmovd xmm0, [esp + 12] // seed
- align 4
wloop:
- vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
- pmulld xmm0, xmm6 // hash *= 33 ^ 16
- vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
- pmulld xmm3, kHashMul0
- vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
- pmulld xmm4, kHashMul1
- vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
- pmulld xmm2, kHashMul2
+ vpmovzxbd xmm3, [eax] // src[0-3]
+ vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
+ vpmovzxbd xmm4, [eax + 4] // src[4-7]
+ vpmulld xmm3, xmm3, xmmword ptr kHashMul0
+ vpmovzxbd xmm2, [eax + 8] // src[8-11]
+ vpmulld xmm4, xmm4, xmmword ptr kHashMul1
+ vpmovzxbd xmm1, [eax + 12] // src[12-15]
+ vpmulld xmm2, xmm2, xmmword ptr kHashMul2
lea eax, [eax + 16]
- pmulld xmm1, kHashMul3
- paddd xmm3, xmm4 // add 16 results
- paddd xmm1, xmm2
+ vpmulld xmm1, xmm1, xmmword ptr kHashMul3
+ vpaddd xmm3, xmm3, xmm4 // add 16 results
+ vpaddd xmm1, xmm1, xmm2
+ vpaddd xmm1, xmm1, xmm3
+ vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
+ vpaddd xmm1, xmm1,xmm2
+ vpshufd xmm2, xmm1, 0x01
+ vpaddd xmm1, xmm1, xmm2
+ vpaddd xmm0, xmm0, xmm1
sub ecx, 16
- paddd xmm1, xmm3
- pshufd xmm2, xmm1, 0x0e // upper 2 dwords
- paddd xmm1, xmm2
- pshufd xmm2, xmm1, 0x01
- paddd xmm1, xmm2
- paddd xmm0, xmm1
jg wloop
- movd eax, xmm0 // return hash
+ vmovd eax, xmm0 // return hash
+ vzeroupper
ret
}
}
#endif // _MSC_VER >= 1700
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/convert.cc b/TMessagesProj/jni/libyuv/source/convert.cc
index c31ecf263..5dc279f8a 100644
--- a/TMessagesProj/jni/libyuv/source/convert.cc
+++ b/TMessagesProj/jni/libyuv/source/convert.cc
@@ -188,17 +188,14 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
int width, int height) {
int y;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
+#if defined(HAS_COPYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src, 16) &&
- IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- CopyRow = CopyRow_SSE2;
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
@@ -207,8 +204,8 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
}
#endif
#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
@@ -248,8 +245,8 @@ static int X420ToI420(const uint8* src_y,
int y;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
- SplitUVRow_C;
+ void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) = SplitUVRow_C;
if (!src_y || !src_uv ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
@@ -283,20 +280,15 @@ static int X420ToI420(const uint8* src_y,
src_stride_uv = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_SPLITUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
SplitUVRow = SplitUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
- IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
- IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
- }
+ SplitUVRow = SplitUVRow_SSE2;
}
}
#endif
#if defined(HAS_SPLITUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
SplitUVRow = SplitUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
SplitUVRow = SplitUVRow_AVX2;
@@ -304,7 +296,7 @@ static int X420ToI420(const uint8* src_y,
}
#endif
#if defined(HAS_SPLITUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
SplitUVRow = SplitUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
SplitUVRow = SplitUVRow_NEON;
@@ -312,15 +304,13 @@ static int X420ToI420(const uint8* src_y,
}
#endif
#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
+ IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
+ IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
+ IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
- if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
- IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
- IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
- SplitUVRow = SplitUVRow_MIPS_DSPR2;
- }
+ SplitUVRow = SplitUVRow_MIPS_DSPR2;
}
}
#endif
@@ -391,125 +381,6 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
width, height);
}
-// Convert Q420 to I420.
-// Format is rows of YY/YUYV
-LIBYUV_API
-int Q420ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- int halfheight;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
- void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int pix) = YUY2ToUV422Row_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
- YUY2ToYRow_C;
- if (!src_y || !src_yuy2 ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_u = dst_u + (halfheight - 1) * dst_stride_u;
- dst_v = dst_v + (halfheight - 1) * dst_stride_v;
- dst_stride_y = -dst_stride_y;
- dst_stride_u = -dst_stride_u;
- dst_stride_v = -dst_stride_v;
- }
- // CopyRow for rows of just Y in Q420 copied to Y plane of I420.
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_X86)
- if (IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- CopyRow = CopyRow_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
-#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
- YUY2ToYRow = YUY2ToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
- YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
- YUY2ToYRow = YUY2ToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
- YUY2ToYRow = YUY2ToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- YUY2ToYRow = YUY2ToYRow_Any_NEON;
- if (width >= 16) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- YUY2ToYRow = YUY2ToYRow_NEON;
- YUY2ToUV422Row = YUY2ToUV422Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- CopyRow(src_y, dst_y, width);
- src_y += src_stride_y;
- dst_y += dst_stride_y;
-
- YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- src_yuy2 += src_stride_yuy2;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- CopyRow(src_y, dst_y, width);
- YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
- }
- return 0;
-}
-
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
@@ -519,9 +390,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
int width, int height) {
int y;
void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUVRow_C;
+ uint8* dst_u, uint8* dst_v, int width) = YUY2ToUVRow_C;
void (*YUY2ToYRow)(const uint8* src_yuy2,
- uint8* dst_y, int pix) = YUY2ToYRow_C;
+ uint8* dst_y, int width) = YUY2ToYRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -529,23 +400,17 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
src_stride_yuy2 = -src_stride_yuy2;
}
#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
- YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
- YUY2ToUVRow = YUY2ToUVRow_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
+ YUY2ToUVRow = YUY2ToUVRow_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
}
}
#endif
#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
YUY2ToUVRow = YUY2ToUVRow_Any_AVX2;
YUY2ToYRow = YUY2ToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
@@ -555,11 +420,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
}
#endif
#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
- if (width >= 16) {
- YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
- }
+ YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
YUY2ToYRow = YUY2ToYRow_NEON;
YUY2ToUVRow = YUY2ToUVRow_NEON;
@@ -592,9 +455,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
int width, int height) {
int y;
void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) = UYVYToUVRow_C;
+ uint8* dst_u, uint8* dst_v, int width) = UYVYToUVRow_C;
void (*UYVYToYRow)(const uint8* src_uyvy,
- uint8* dst_y, int pix) = UYVYToYRow_C;
+ uint8* dst_y, int width) = UYVYToYRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -602,23 +465,17 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
src_stride_uyvy = -src_stride_uyvy;
}
#if defined(HAS_UYVYTOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
- UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
- UYVYToUVRow = UYVYToUVRow_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- UYVYToYRow = UYVYToYRow_SSE2;
- }
- }
+ UYVYToUVRow = UYVYToUVRow_SSE2;
+ UYVYToYRow = UYVYToYRow_SSE2;
}
}
#endif
#if defined(HAS_UYVYTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
UYVYToUVRow = UYVYToUVRow_Any_AVX2;
UYVYToYRow = UYVYToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
@@ -628,11 +485,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
}
#endif
#if defined(HAS_UYVYTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
UYVYToYRow = UYVYToYRow_Any_NEON;
- if (width >= 16) {
- UYVYToUVRow = UYVYToUVRow_Any_NEON;
- }
+ UYVYToUVRow = UYVYToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_NEON;
UYVYToUVRow = UYVYToUVRow_NEON;
@@ -666,7 +521,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
int y;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb ||
!dst_y || !dst_u || !dst_v ||
@@ -680,23 +535,17 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
@@ -706,7 +555,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -714,7 +563,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
@@ -748,7 +597,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
int y;
void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
- void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
+ void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int width) =
BGRAToYRow_C;
if (!src_bgra ||
!dst_y || !dst_u || !dst_v ||
@@ -761,23 +610,18 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
src_bgra = src_bgra + (height - 1) * src_stride_bgra;
src_stride_bgra = -src_stride_bgra;
}
-#if defined(HAS_BGRATOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
BGRAToYRow = BGRAToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
- BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) {
- BGRAToUVRow = BGRAToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- BGRAToYRow = BGRAToYRow_SSSE3;
- }
- }
+ BGRAToUVRow = BGRAToUVRow_SSSE3;
+ BGRAToYRow = BGRAToYRow_SSSE3;
}
}
-#elif defined(HAS_BGRATOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_BGRATOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
BGRAToYRow = BGRAToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_NEON;
@@ -785,7 +629,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
}
#endif
#if defined(HAS_BGRATOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
BGRAToUVRow = BGRAToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_NEON;
@@ -819,7 +663,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
int y;
void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
- void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
+ void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int width) =
ABGRToYRow_C;
if (!src_abgr ||
!dst_y || !dst_u || !dst_v ||
@@ -832,23 +676,18 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
-#if defined(HAS_ABGRTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
- ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) {
- ABGRToUVRow = ABGRToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ABGRToYRow = ABGRToYRow_SSSE3;
- }
- }
+ ABGRToUVRow = ABGRToUVRow_SSSE3;
+ ABGRToYRow = ABGRToYRow_SSSE3;
}
}
-#elif defined(HAS_ABGRTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ABGRTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYRow = ABGRToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_NEON;
@@ -856,7 +695,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
}
#endif
#if defined(HAS_ABGRTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ABGRToUVRow = ABGRToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_NEON;
@@ -890,7 +729,7 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
int y;
void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
- void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
+ void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int width) =
RGBAToYRow_C;
if (!src_rgba ||
!dst_y || !dst_u || !dst_v ||
@@ -903,23 +742,18 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
src_rgba = src_rgba + (height - 1) * src_stride_rgba;
src_stride_rgba = -src_stride_rgba;
}
-#if defined(HAS_RGBATOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
RGBAToYRow = RGBAToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
- RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) {
- RGBAToUVRow = RGBAToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- RGBAToYRow = RGBAToYRow_SSSE3;
- }
- }
+ RGBAToUVRow = RGBAToUVRow_SSSE3;
+ RGBAToYRow = RGBAToYRow_SSSE3;
}
}
-#elif defined(HAS_RGBATOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_RGBATOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
RGBAToYRow = RGBAToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_NEON;
@@ -927,7 +761,7 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
}
#endif
#if defined(HAS_RGBATOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
RGBAToUVRow = RGBAToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_NEON;
@@ -962,14 +796,14 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
#if defined(HAS_RGB24TOYROW_NEON)
void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
- void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
+ void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int width) =
RGB24ToYRow_C;
#else
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RGB24ToARGBRow_C;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
@@ -983,54 +817,51 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
src_stride_rgb24 = -src_stride_rgb24;
}
+// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
RGB24ToYRow = RGB24ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToYRow = RGB24ToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToUVRow = RGB24ToUVRow_NEON;
+ }
}
}
-#endif
-#if defined(HAS_RGB24TOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- RGB24ToUVRow = RGB24ToUVRow_NEON;
- }
- }
-#endif
+// Other platforms do intermediate conversion from RGB24 to ARGB.
+#else
#if defined(HAS_RGB24TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
}
}
-#endif // HAS_ARGBTOUVROW_SSSE3
-
+#endif
{
-#if !defined(HAS_RGB24TOYROW_NEON)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
+ const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
@@ -1063,8 +894,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
}
#if !defined(HAS_RGB24TOYROW_NEON)
free_aligned_buffer_64(row);
-#endif
}
+#endif
return 0;
}
@@ -1079,14 +910,14 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
#if defined(HAS_RAWTOYROW_NEON)
void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
- void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
+ void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int width) =
RAWToYRow_C;
#else
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v ||
@@ -1100,86 +931,85 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
src_stride_raw = -src_stride_raw;
}
+// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToUVRow = RAWToUVRow_Any_NEON;
RAWToYRow = RAWToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYRow = RAWToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToUVRow = RAWToUVRow_NEON;
+ }
}
}
-#endif
-#if defined(HAS_RAWTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- RAWToUVRow = RAWToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- RAWToUVRow = RAWToUVRow_NEON;
- }
- }
-#endif
+// Other platforms do intermediate conversion from RAW to ARGB.
+#else
#if defined(HAS_RAWTOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
}
}
-#endif // HAS_ARGBTOUVROW_SSSE3
-
+#endif
{
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
+ const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
+#endif
for (y = 0; y < height - 1; y += 2) {
- #if defined(HAS_RAWTOYROW_NEON)
+#if defined(HAS_RAWTOYROW_NEON)
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
- #else
+#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
- #endif
+#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
- #if defined(HAS_RAWTOYROW_NEON)
+#if defined(HAS_RAWTOYROW_NEON)
RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
- #else
+#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- #endif
+#endif
}
- #if !defined(HAS_RAWTOYROW_NEON)
+#if !defined(HAS_RAWTOYROW_NEON)
free_aligned_buffer_64(row);
- #endif
}
+#endif
return 0;
}
@@ -1194,14 +1024,14 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
#if defined(HAS_RGB565TOYROW_NEON)
void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
- void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
+ void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int width) =
RGB565ToYRow_C;
#else
- void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RGB565ToARGBRow_C;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
@@ -1215,54 +1045,59 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
src_stride_rgb565 = -src_stride_rgb565;
}
+// Neon version does direct RGB565 to YUV.
#if defined(HAS_RGB565TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
RGB565ToYRow = RGB565ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToYRow = RGB565ToYRow_NEON;
- }
- if (width >= 16) {
- RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RGB565ToUVRow = RGB565ToUVRow_NEON;
}
}
}
-#else // HAS_RGB565TOYROW_NEON
-
+// Other platforms do intermediate conversion from RGB565 to ARGB.
+#else
#if defined(HAS_RGB565TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
+ RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_RGB565TOYROW_NEON
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
{
-#if !defined(HAS_RGB565TOYROW_NEON)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
+ const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
@@ -1295,8 +1130,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
}
#if !defined(HAS_RGB565TOYROW_NEON)
free_aligned_buffer_64(row);
-#endif
}
+#endif
return 0;
}
@@ -1311,14 +1146,14 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
#if defined(HAS_ARGB1555TOYROW_NEON)
void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
- void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
+ void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int width) =
ARGB1555ToYRow_C;
#else
- void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
ARGB1555ToARGBRow_C;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
@@ -1332,56 +1167,62 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
src_stride_argb1555 = -src_stride_argb1555;
}
+// Neon version does direct ARGB1555 to YUV.
#if defined(HAS_ARGB1555TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
- }
- if (width >= 16) {
- ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
}
}
}
-#else // HAS_ARGB1555TOYROW_NEON
-
+// Other platforms do intermediate conversion from ARGB1555 to ARGB.
+#else
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_ARGB1555TOYROW_NEON
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
{
-#if !defined(HAS_ARGB1555TOYROW_NEON)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
+ const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
+
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_ARGB1555TOYROW_NEON)
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
@@ -1412,9 +1253,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
#endif
}
#if !defined(HAS_ARGB1555TOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
+ free_aligned_buffer_64(row);
}
+#endif
return 0;
}
@@ -1429,14 +1270,14 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
#if defined(HAS_ARGB4444TOYROW_NEON)
void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
- void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
+ void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int width) =
ARGB4444ToYRow_C;
#else
- void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
ARGB4444ToARGBRow_C;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
@@ -1450,54 +1291,59 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
src_stride_argb4444 = -src_stride_argb4444;
}
+// Neon version does direct ARGB4444 to YUV.
#if defined(HAS_ARGB4444TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
- }
- if (width >= 16) {
- ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
}
}
}
-#else // HAS_ARGB4444TOYROW_NEON
-
+// Other platforms do intermediate conversion from ARGB4444 to ARGB.
+#else
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_ARGB4444TOYROW_NEON
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
{
-#if !defined(HAS_ARGB4444TOYROW_NEON)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
+ const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
@@ -1532,8 +1378,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
}
#if !defined(HAS_ARGB4444TOYROW_NEON)
free_aligned_buffer_64(row);
-#endif
}
+#endif
return 0;
}
diff --git a/TMessagesProj/jni/libyuv/source/convert_argb.cc b/TMessagesProj/jni/libyuv/source/convert_argb.cc
index ac0bc3d15..cf3d72289 100644
--- a/TMessagesProj/jni/libyuv/source/convert_argb.cc
+++ b/TMessagesProj/jni/libyuv/source/convert_argb.cc
@@ -11,7 +11,6 @@
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
@@ -45,21 +44,21 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
return 0;
}
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
+// Convert I422 to ARGB with matrix
+static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width, int height) {
int y;
- void (*I444ToARGBRow)(const uint8* y_buf,
+ void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = I444ToARGBRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
+ const struct YuvConstants* yuvconstants,
+ int width) = I422ToARGBRow_C;
+ if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
@@ -69,56 +68,155 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u == width &&
- src_stride_v == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
-#if defined(HAS_I444TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
+#if defined(HAS_I422TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I444ToARGBRow = I444ToARGBRow_SSSE3;
- }
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
-#elif defined(HAS_I444TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I444ToARGBRow = I444ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I444ToARGBRow = I444ToARGBRow_NEON;
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+ IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+ IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+ IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+ IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+ I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+ }
+#endif
for (y = 0; y < height; ++y) {
- I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
}
return 0;
}
-// Convert I422 to ARGB.
+// Convert I420 to ARGB.
LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
+int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvI601Constants,
+ width, height);
+}
+
+// Convert I420 to ABGR.
+LIBYUV_API
+int I420ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert J420 to ARGB.
+LIBYUV_API
+int J420ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvJPEGConstants,
+ width, height);
+}
+
+// Convert J420 to ABGR.
+LIBYUV_API
+int J420ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_abgr, dst_stride_abgr,
+ &kYvuJPEGConstants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert H420 to ARGB.
+LIBYUV_API
+int H420ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvH709Constants,
+ width, height);
+}
+
+// Convert H420 to ABGR.
+LIBYUV_API
+int H420ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_abgr, dst_stride_abgr,
+ &kYvuH709Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert I422 to ARGB with matrix
+static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width, int height) {
int y;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@@ -141,18 +239,15 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
@@ -160,7 +255,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
@@ -178,7 +273,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -187,6 +282,210 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
return 0;
}
+// Convert I422 to ARGB.
+LIBYUV_API
+int I422ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvI601Constants,
+ width, height);
+}
+
+// Convert I422 to ABGR.
+LIBYUV_API
+int I422ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert J422 to ARGB.
+LIBYUV_API
+int J422ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvJPEGConstants,
+ width, height);
+}
+
+// Convert J422 to ABGR.
+LIBYUV_API
+int J422ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_abgr, dst_stride_abgr,
+ &kYvuJPEGConstants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert H422 to ARGB.
+LIBYUV_API
+int H422ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvH709Constants,
+ width, height);
+}
+
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_abgr, dst_stride_abgr,
+ &kYvuH709Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert I444 to ARGB with matrix
+static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width, int height) {
+ int y;
+ void (*I444ToARGBRow)(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) = I444ToARGBRow_C;
+ if (!src_y || !src_u || !src_v ||
+ !dst_argb ||
+ width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width &&
+ src_stride_u == width &&
+ src_stride_v == width &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+ }
+#if defined(HAS_I444TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToARGBRow = I444ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToARGBRow = I444ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToARGBRow = I444ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToARGBRow = I444ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToARGBRow = I444ToARGBRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I444 to ARGB.
+LIBYUV_API
+int I444ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
+ return I444ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvI601Constants,
+ width, height);
+}
+
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height) {
+ return I444ToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
+ return I444ToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_argb, dst_stride_argb,
+ &kYuvJPEGConstants,
+ width, height);
+}
+
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
@@ -199,6 +498,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = I411ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@@ -221,17 +521,23 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
#if defined(HAS_I411TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I411ToARGBRow = I411ToARGBRow_SSSE3;
- }
+ I411ToARGBRow = I411ToARGBRow_SSSE3;
}
}
-#elif defined(HAS_I411TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_I411TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I411ToARGBRow = I411ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I411ToARGBRow = I411ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I411TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I411ToARGBRow = I411ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_NEON;
@@ -240,7 +546,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvI601Constants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -249,15 +555,152 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
return 0;
}
+// Convert I420 with Alpha to preattenuated ARGB.
+static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ const uint8* src_a, int src_stride_a,
+ uint8* dst_argb, int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width, int height, int attenuate) {
+ int y;
+ void (*I422AlphaToARGBRow)(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) = I422AlphaToARGBRow_C;
+ void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
+ int width) = ARGBAttenuateRow_C;
+ if (!src_y || !src_u || !src_v || !dst_argb ||
+ width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+ IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+ IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+ IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+ IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_MIPS_DSPR2;
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
+ width);
+ if (attenuate) {
+ ARGBAttenuateRow(dst_argb, dst_argb, width);
+ }
+ dst_argb += dst_stride_argb;
+ src_a += src_stride_a;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 with Alpha to ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ const uint8* src_a, int src_stride_a,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height, int attenuate) {
+ return I420AlphaToARGBMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ src_a, src_stride_a,
+ dst_argb, dst_stride_argb,
+ &kYuvI601Constants,
+ width, height, attenuate);
+}
+
+// Convert I420 with Alpha to ABGR.
+LIBYUV_API
+int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ const uint8* src_a, int src_stride_a,
+ uint8* dst_abgr, int dst_stride_abgr,
+ int width, int height, int attenuate) {
+ return I420AlphaToARGBMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ src_a, src_stride_a,
+ dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height, attenuate);
+}
+
// Convert I400 to ARGB.
LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
+int I400ToARGB(const uint8* src_y, int src_stride_y,
+ uint8* dst_argb, int dst_stride_argb,
+ int width, int height) {
int y;
- void (*YToARGBRow)(const uint8* y_buf,
+ void (*I400ToARGBRow)(const uint8* y_buf,
uint8* rgb_buf,
- int width) = YToARGBRow_C;
+ int width) = I400ToARGBRow_C;
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -275,39 +718,47 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
height = 1;
src_stride_y = dst_stride_argb = 0;
}
-#if defined(HAS_YTOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- YToARGBRow = YToARGBRow_Any_SSE2;
+#if defined(HAS_I400TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I400ToARGBRow = I400ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
- YToARGBRow = YToARGBRow_SSE2;
+ I400ToARGBRow = I400ToARGBRow_SSE2;
}
}
-#elif defined(HAS_YTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- YToARGBRow = YToARGBRow_Any_NEON;
+#endif
+#if defined(HAS_I400TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I400ToARGBRow = I400ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I400ToARGBRow = I400ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I400TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I400ToARGBRow = I400ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- YToARGBRow = YToARGBRow_NEON;
+ I400ToARGBRow = I400ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
- YToARGBRow(src_y, dst_argb, width);
+ I400ToARGBRow(src_y, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
}
return 0;
}
-// Convert I400 to ARGB.
+// Convert J400 to ARGB.
LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
+int J400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
- I400ToARGBRow_C;
+ void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
+ J400ToARGBRow_C;
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -325,26 +776,32 @@ int I400ToARGB(const uint8* src_y, int src_stride_y,
height = 1;
src_stride_y = dst_stride_argb = 0;
}
-#if defined(HAS_I400TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
- I400ToARGBRow = I400ToARGBRow_Any_SSE2;
+#if defined(HAS_J400TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ J400ToARGBRow = J400ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
- I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I400ToARGBRow = I400ToARGBRow_SSE2;
- }
+ J400ToARGBRow = J400ToARGBRow_SSE2;
}
}
-#elif defined(HAS_I400TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I400ToARGBRow = I400ToARGBRow_Any_NEON;
+#endif
+#if defined(HAS_J400TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ J400ToARGBRow = J400ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ J400ToARGBRow = J400ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_J400TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ J400ToARGBRow = J400ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- I400ToARGBRow = I400ToARGBRow_NEON;
+ J400ToARGBRow = J400ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
- I400ToARGBRow(src_y, dst_argb, width);
+ J400ToARGBRow(src_y, dst_argb, width);
src_y += src_stride_y;
dst_argb += dst_stride_argb;
}
@@ -427,7 +884,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RGB24ToARGBRow_C;
if (!src_rgb24 || !dst_argb ||
width <= 0 || height == 0) {
@@ -447,15 +904,15 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
src_stride_rgb24 = dst_stride_argb = 0;
}
#if defined(HAS_RGB24TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
-#elif defined(HAS_RGB24TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_RGB24TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
@@ -477,7 +934,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RAWToARGBRow_C;
if (!src_raw || !dst_argb ||
width <= 0 || height == 0) {
@@ -497,15 +954,15 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
src_stride_raw = dst_stride_argb = 0;
}
#if defined(HAS_RAWTOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
-#elif defined(HAS_RAWTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_RAWTOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
@@ -527,7 +984,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
+ void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
RGB565ToARGBRow_C;
if (!src_rgb565 || !dst_argb ||
width <= 0 || height == 0) {
@@ -547,15 +1004,23 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
src_stride_rgb565 = dst_stride_argb = 0;
}
#if defined(HAS_RGB565TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
}
}
-#elif defined(HAS_RGB565TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_RGB565TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_NEON;
@@ -578,7 +1043,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
int width, int height) {
int y;
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
- int pix) = ARGB1555ToARGBRow_C;
+ int width) = ARGB1555ToARGBRow_C;
if (!src_argb1555 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -597,15 +1062,23 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
src_stride_argb1555 = dst_stride_argb = 0;
}
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
}
}
-#elif defined(HAS_ARGB1555TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
@@ -628,7 +1101,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
int width, int height) {
int y;
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
- int pix) = ARGB4444ToARGBRow_C;
+ int width) = ARGB4444ToARGBRow_C;
if (!src_argb4444 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -647,15 +1120,23 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
src_stride_argb4444 = dst_stride_argb = 0;
}
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
}
}
-#elif defined(HAS_ARGB4444TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
@@ -681,6 +1162,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
@@ -693,17 +1175,23 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_NV12TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- NV12ToARGBRow = NV12ToARGBRow_SSSE3;
- }
+ NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
-#elif defined(HAS_NV12TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_NV12TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV12ToARGBRow = NV12ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_NEON;
@@ -712,7 +1200,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- NV12ToARGBRow(src_y, src_uv, dst_argb, width);
+ NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@@ -732,6 +1220,7 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
void (*NV21ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = NV21ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
@@ -744,18 +1233,23 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_NV21TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- NV21ToARGBRow = NV21ToARGBRow_SSSE3;
- }
+ NV21ToARGBRow = NV21ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_NV21TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV21ToARGBRow = NV21ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_NV21TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_NEON;
@@ -764,7 +1258,7 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- NV21ToARGBRow(src_y, src_uv, dst_argb, width);
+ NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@@ -783,6 +1277,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = NV12ToARGBRow_C;
if (!src_m420 || !dst_argb ||
width <= 0 || height == 0) {
@@ -795,17 +1290,23 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_NV12TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- NV12ToARGBRow = NV12ToARGBRow_SSSE3;
- }
+ NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
-#elif defined(HAS_NV12TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_NV12TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV12ToARGBRow = NV12ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_NEON;
@@ -814,14 +1315,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
#endif
for (y = 0; y < height - 1; y += 2) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+ NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+ &kYuvI601Constants, width);
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
- dst_argb + dst_stride_argb, width);
+ dst_argb + dst_stride_argb, &kYuvI601Constants, width);
dst_argb += dst_stride_argb * 2;
src_m420 += src_stride_m420 * 3;
}
if (height & 1) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+ NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+ &kYuvI601Constants, width);
}
return 0;
}
@@ -832,7 +1335,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
+ void (*YUY2ToARGBRow)(const uint8* src_yuy2,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) =
YUY2ToARGBRow_C;
if (!src_yuy2 || !dst_argb ||
width <= 0 || height == 0) {
@@ -852,19 +1358,23 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
src_stride_yuy2 = dst_stride_argb = 0;
}
#if defined(HAS_YUY2TOARGBROW_SSSE3)
- // Posix is 16, Windows is 8.
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
- }
+ YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
}
}
-#elif defined(HAS_YUY2TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
YUY2ToARGBRow = YUY2ToARGBRow_NEON;
@@ -872,7 +1382,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
}
#endif
for (y = 0; y < height; ++y) {
- YUY2ToARGBRow(src_yuy2, dst_argb, width);
+ YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width);
src_yuy2 += src_stride_yuy2;
dst_argb += dst_stride_argb;
}
@@ -885,7 +1395,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
+ void (*UYVYToARGBRow)(const uint8* src_uyvy,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) =
UYVYToARGBRow_C;
if (!src_uyvy || !dst_argb ||
width <= 0 || height == 0) {
@@ -905,19 +1418,23 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
src_stride_uyvy = dst_stride_argb = 0;
}
#if defined(HAS_UYVYTOARGBROW_SSSE3)
- // Posix is 16, Windows is 8.
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- UYVYToARGBRow = UYVYToARGBRow_SSSE3;
- }
+ UYVYToARGBRow = UYVYToARGBRow_SSSE3;
}
}
-#elif defined(HAS_UYVYTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_UYVYTOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ UYVYToARGBRow = UYVYToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ UYVYToARGBRow = UYVYToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
UYVYToARGBRow = UYVYToARGBRow_NEON;
@@ -925,7 +1442,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
}
#endif
for (y = 0; y < height; ++y) {
- UYVYToARGBRow(src_uyvy, dst_argb, width);
+ UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width);
src_uyvy += src_stride_uyvy;
dst_argb += dst_stride_argb;
}
diff --git a/TMessagesProj/jni/libyuv/source/convert_from.cc b/TMessagesProj/jni/libyuv/source/convert_from.cc
index c1a2f62f0..9c138d936 100644
--- a/TMessagesProj/jni/libyuv/source/convert_from.cc
+++ b/TMessagesProj/jni/libyuv/source/convert_from.cc
@@ -13,7 +13,6 @@
#include "libyuv/basic_types.h"
#include "libyuv/convert.h" // For I420Copy
#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/scale.h" // For ScalePlane()
@@ -174,14 +173,15 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
}
#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
-#elif defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_I422TOYUY2ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
@@ -220,14 +220,15 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
dst_stride_yuy2 = -dst_stride_yuy2;
}
#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
-#elif defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_I422TOYUY2ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
@@ -280,14 +281,15 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
}
#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
-#elif defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_I422TOUYVYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
@@ -326,14 +328,15 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
dst_stride_uyvy = -dst_stride_uyvy;
}
#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
-#elif defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_I422TOUYVYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
@@ -397,20 +400,15 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
src_stride_u = src_stride_v = dst_stride_uv = 0;
}
#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
- IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
- IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
+ MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
@@ -418,7 +416,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
@@ -452,185 +450,67 @@ int I420ToNV21(const uint8* src_y, int src_stride_y,
width, height);
}
-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
+// Convert I422 to RGBA with matrix
+static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_rgba, int dst_stride_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width, int height) {
int y;
- void (*I422ToARGBRow)(const uint8* y_buf,
+ void (*I422ToRGBARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
- if (!src_y || !src_u || !src_v || !dst_argb ||
+ const struct YuvConstants* yuvconstants,
+ int width) = I422ToRGBARow_C;
+ if (!src_y || !src_u || !src_v || !dst_rgba ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
+ dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
+ dst_stride_rgba = -dst_stride_rgba;
}
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+#if defined(HAS_I422TORGBAROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
+ I422ToRGBARow = I422ToRGBARow_SSSE3;
}
}
#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+#if defined(HAS_I422TORGBAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGBARow = I422ToRGBARow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
+ I422ToRGBARow = I422ToRGBARow_AVX2;
}
}
#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
+#if defined(HAS_I422TORGBAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGBARow = I422ToRGBARow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
+ I422ToRGBARow = I422ToRGBARow_NEON;
}
}
#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TORGBAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+ IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
+ I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to BGRA.
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height) {
- int y;
- void (*I422ToBGRARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToBGRARow_C;
- if (!src_y || !src_u || !src_v || !dst_bgra ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
- dst_stride_bgra = -dst_stride_bgra;
- }
-#if defined(HAS_I422TOBGRAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
- I422ToBGRARow = I422ToBGRARow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I422TOBGRAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToBGRARow = I422ToBGRARow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_NEON;
- }
- }
-#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
- I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
- dst_bgra += dst_stride_bgra;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to ABGR.
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height) {
- int y;
- void (*I422ToABGRRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToABGRRow_C;
- if (!src_y || !src_u || !src_v || !dst_abgr ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
- dst_stride_abgr = -dst_stride_abgr;
- }
-#if defined(HAS_I422TOABGRROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
- I422ToABGRRow = I422ToABGRRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I422TOABGRROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToABGRRow = I422ToABGRRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
- dst_abgr += dst_stride_abgr;
+ I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
+ dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
@@ -647,44 +527,81 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height) {
+ return I420ToRGBAMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_rgba, dst_stride_rgba,
+ &kYuvI601Constants,
+ width, height);
+}
+
+// Convert I420 to BGRA.
+LIBYUV_API
+int I420ToBGRA(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_bgra, int dst_stride_bgra,
+ int width, int height) {
+ return I420ToRGBAMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_bgra, dst_stride_bgra,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert I420 to RGB24 with matrix
+static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_rgb24, int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width, int height) {
int y;
- void (*I422ToRGBARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGBARow_C;
- if (!src_y || !src_u || !src_v || !dst_rgba ||
+ void (*I422ToRGB24Row)(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) = I422ToRGB24Row_C;
+ if (!src_y || !src_u || !src_v || !dst_rgb24 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
- dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
- dst_stride_rgba = -dst_stride_rgba;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
}
-#if defined(HAS_I422TORGBAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
+#if defined(HAS_I422TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
- I422ToRGBARow = I422ToRGBARow_SSSE3;
- }
+ I422ToRGB24Row = I422ToRGB24Row_SSSE3;
}
}
-#elif defined(HAS_I422TORGBAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRGBARow = I422ToRGBARow_Any_NEON;
+#endif
+#if defined(HAS_I422TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_NEON;
+ I422ToRGB24Row = I422ToRGB24Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
- dst_rgba += dst_stride_rgba;
+ I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
@@ -701,99 +618,27 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) {
- int y;
- void (*I422ToRGB24Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGB24Row_C;
- if (!src_y || !src_u || !src_v || !dst_rgb24 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
- dst_stride_rgb24 = -dst_stride_rgb24;
- }
-#if defined(HAS_I422TORGB24ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB24Row = I422ToRGB24Row_SSSE3;
- }
- }
-#elif defined(HAS_I422TORGB24ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB24Row = I422ToRGB24Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
- dst_rgb24 += dst_stride_rgb24;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
+ return I420ToRGB24Matrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_rgb24, dst_stride_rgb24,
+ &kYuvI601Constants,
+ width, height);
}
// Convert I420 to RAW.
LIBYUV_API
int I420ToRAW(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_raw, int dst_stride_raw,
- int width, int height) {
- int y;
- void (*I422ToRAWRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRAWRow_C;
- if (!src_y || !src_u || !src_v || !dst_raw ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_raw = dst_raw + (height - 1) * dst_stride_raw;
- dst_stride_raw = -dst_stride_raw;
- }
-#if defined(HAS_I422TORAWROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRAWRow = I422ToRAWRow_SSSE3;
- }
- }
-#elif defined(HAS_I422TORAWROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRAWRow = I422ToRAWRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRAWRow = I422ToRAWRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
- dst_raw += dst_stride_raw;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_raw, int dst_stride_raw,
+ int width, int height) {
+ return I420ToRGB24Matrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_raw, dst_stride_raw,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
}
// Convert I420 to ARGB1555.
@@ -808,6 +653,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = I422ToARGB1555Row_C;
if (!src_y || !src_u || !src_v || !dst_argb1555 ||
width <= 0 || height == 0) {
@@ -820,14 +666,23 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
dst_stride_argb1555 = -dst_stride_argb1555;
}
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
}
}
-#elif defined(HAS_I422TOARGB1555ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_I422TOARGB1555ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB1555ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGB1555Row = I422ToARGB1555Row_NEON;
@@ -836,7 +691,8 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
+ I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
+ width);
dst_argb1555 += dst_stride_argb1555;
src_y += src_stride_y;
if (y & 1) {
@@ -860,6 +716,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = I422ToARGB4444Row_C;
if (!src_y || !src_u || !src_v || !dst_argb4444 ||
width <= 0 || height == 0) {
@@ -872,14 +729,23 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
dst_stride_argb4444 = -dst_stride_argb4444;
}
#if defined(HAS_I422TOARGB4444ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
}
}
-#elif defined(HAS_I422TOARGB4444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_I422TOARGB4444ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB4444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGB4444Row = I422ToARGB4444Row_NEON;
@@ -888,7 +754,8 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
+ I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
+ width);
dst_argb4444 += dst_stride_argb4444;
src_y += src_stride_y;
if (y & 1) {
@@ -911,6 +778,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = I422ToRGB565Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
width <= 0 || height == 0) {
@@ -923,14 +791,23 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
dst_stride_rgb565 = -dst_stride_rgb565;
}
#if defined(HAS_I422TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToRGB565Row = I422ToRGB565Row_SSSE3;
}
}
-#elif defined(HAS_I422TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_I422TORGB565ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB565ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToRGB565Row = I422ToRGB565Row_NEON;
@@ -939,7 +816,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
+ I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@@ -950,6 +827,118 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
return 0;
}
+// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
+static const uint8 kDither565_4x4[16] = {
+ 0, 4, 1, 5,
+ 6, 2, 7, 3,
+ 1, 5, 0, 4,
+ 7, 3, 6, 2,
+};
+
+// Convert I420 to RGB565 with dithering.
+LIBYUV_API
+int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_rgb565, int dst_stride_rgb565,
+ const uint8* dither4x4, int width, int height) {
+ int y;
+ void (*I422ToARGBRow)(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) = I422ToARGBRow_C;
+ void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
+ if (!src_y || !src_u || !src_v || !dst_rgb565 ||
+ width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+ dst_stride_rgb565 = -dst_stride_rgb565;
+ }
+ if (!dither4x4) {
+ dither4x4 = kDither565_4x4;
+ }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+ IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+ IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+ IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
+ I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
+ }
+ }
+#endif
+ {
+ // Allocate a row of argb.
+ align_buffer_64(row_argb, width * 4);
+ for (y = 0; y < height; ++y) {
+ I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
+ ARGBToRGB565DitherRow(row_argb, dst_rgb565,
+ *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
+ dst_rgb565 += dst_stride_rgb565;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ free_aligned_buffer_64(row_argb);
+ }
+ return 0;
+}
+
// Convert I420 to specified format
LIBYUV_API
int ConvertFromI420(const uint8* y, int y_stride,
@@ -1054,38 +1043,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
- case FOURCC_BGGR:
- r = I420ToBayerBGGR(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_GBRG:
- r = I420ToBayerGBRG(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_GRBG:
- r = I420ToBayerGRBG(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_RGGB:
- r = I420ToBayerRGGB(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
case FOURCC_I400:
r = I400Copy(y, y_stride,
dst_sample,
@@ -1116,7 +1073,7 @@ int ConvertFromI420(const uint8* y, int y_stride,
width, height);
break;
}
- // TODO(fbarchard): Add M420 and Q420.
+ // TODO(fbarchard): Add M420.
// Triplanar formats
// TODO(fbarchard): halfstride instead of halfwidth
case FOURCC_I420:
diff --git a/TMessagesProj/jni/libyuv/source/convert_from_argb.cc b/TMessagesProj/jni/libyuv/source/convert_from_argb.cc
index de461ddb0..6796343c0 100644
--- a/TMessagesProj/jni/libyuv/source/convert_from_argb.cc
+++ b/TMessagesProj/jni/libyuv/source/convert_from_argb.cc
@@ -12,7 +12,6 @@
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/row.h"
@@ -29,10 +28,10 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_v, int dst_stride_v,
int width, int height) {
int y;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV444Row_C;
+ int width) = ARGBToUV444Row_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
@@ -51,17 +50,15 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUV444ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV444Row = ARGBToUV444Row_SSSE3;
- }
+ ARGBToUV444Row = ARGBToUV444Row_SSSE3;
}
}
-#elif defined(HAS_ARGBTOUV444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOUV444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_NEON;
@@ -69,19 +66,23 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -109,8 +110,8 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ int width) = ARGBToUV422Row_C;
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -130,37 +131,39 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
+ ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
}
-#elif defined(HAS_ARGBTOUV422ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON;
}
}
#endif
-
#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -188,8 +191,8 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV411Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ int width) = ARGBToUV411Row_C;
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -209,19 +212,15 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
@@ -229,7 +228,7 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -237,7 +236,7 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUV411ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 32) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
if (IS_ALIGNED(width, 32)) {
ARGBToUV411Row = ARGBToUV411Row_NEON;
@@ -265,7 +264,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
@@ -281,22 +280,27 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -304,7 +308,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
@@ -312,18 +316,15 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
+ MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
@@ -331,7 +332,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
@@ -340,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
#endif
{
// Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
+ align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
+ uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -372,7 +373,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
@@ -388,22 +389,27 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -411,7 +417,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
@@ -419,18 +425,15 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
+ MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
@@ -438,7 +441,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
@@ -447,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
#endif
{
// Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
+ align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
+ uint8* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -476,8 +479,8 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ int width) = ARGBToUV422Row_C;
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
@@ -500,17 +503,15 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_yuy2 = 0;
}
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
+ ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
}
-#elif defined(HAS_ARGBTOUV422ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON;
@@ -518,17 +519,23 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -537,14 +544,15 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
#endif
#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
-#elif defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_I422TOYUY2ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
@@ -578,8 +586,8 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ int width) = ARGBToUV422Row_C;
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
@@ -602,17 +610,15 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_uyvy = 0;
}
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
+ ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
}
-#elif defined(HAS_ARGBTOUV422ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_ARGBTOUV422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON;
@@ -620,17 +626,23 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -639,14 +651,15 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
#endif
#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
-#elif defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+#endif
+#if defined(HAS_I422TOUYVYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
@@ -679,7 +692,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height) {
int y;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+ void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || width <= 0 || height == 0) {
return -1;
@@ -697,19 +710,15 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_y = 0;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
@@ -717,7 +726,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
@@ -755,7 +764,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) {
int y;
- void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+ void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRGB24Row_C;
if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
@@ -773,14 +782,15 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_rgb24 = 0;
}
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
}
-#elif defined(HAS_ARGBTORGB24ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB24Row = ARGBToRGB24Row_NEON;
@@ -802,7 +812,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_raw, int dst_stride_raw,
int width, int height) {
int y;
- void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+ void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRAWRow_C;
if (!src_argb || !dst_raw || width <= 0 || height == 0) {
return -1;
@@ -820,14 +830,15 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_raw = 0;
}
#if defined(HAS_ARGBTORAWROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
}
-#elif defined(HAS_ARGBTORAWROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTORAWROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRAWRow = ARGBToRAWRow_NEON;
@@ -843,13 +854,74 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
return 0;
}
+// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
+static const uint8 kDither565_4x4[16] = {
+ 0, 4, 1, 5,
+ 6, 2, 7, 3,
+ 1, 5, 0, 4,
+ 7, 3, 6, 2,
+};
+
+// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
+LIBYUV_API
+int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
+ uint8* dst_rgb565, int dst_stride_rgb565,
+ const uint8* dither4x4, int width, int height) {
+ int y;
+ void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
+ if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ if (!dither4x4) {
+ dither4x4 = kDither565_4x4;
+ }
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ ARGBToRGB565DitherRow(src_argb, dst_rgb565,
+ *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
+ src_argb += src_stride_argb;
+ dst_rgb565 += dst_stride_rgb565;
+ }
+ return 0;
+}
+
// Convert ARGB To RGB565.
+// TODO(fbarchard): Consider using dither function low level with zeros.
LIBYUV_API
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
int y;
- void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+ void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToRGB565Row_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -867,15 +939,23 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_rgb565 = 0;
}
#if defined(HAS_ARGBTORGB565ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
}
-#elif defined(HAS_ARGBTORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTORGB565ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565Row = ARGBToRGB565Row_NEON;
@@ -897,7 +977,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height) {
int y;
- void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+ void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToARGB1555Row_C;
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
return -1;
@@ -915,15 +995,23 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb1555 = 0;
}
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
}
}
-#elif defined(HAS_ARGBTOARGB1555ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB1555ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
@@ -945,7 +1033,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height) {
int y;
- void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+ void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToARGB4444Row_C;
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
return -1;
@@ -963,15 +1051,23 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb4444 = 0;
}
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
}
}
-#elif defined(HAS_ARGBTOARGB4444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB4444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
@@ -996,8 +1092,8 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
int width, int height) {
int y;
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
+ uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
+ void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb ||
!dst_yj || !dst_u || !dst_v ||
@@ -1011,23 +1107,17 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
- ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_SSSE3;
- if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
- }
+ ARGBToUVJRow = ARGBToUVJRow_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
@@ -1035,7 +1125,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
@@ -1043,7 +1133,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOUVJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
@@ -1067,13 +1157,95 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
return 0;
}
+// ARGB little endian (bgra in memory) to J422
+LIBYUV_API
+int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_u, int dst_stride_u,
+ uint8* dst_v, int dst_stride_v,
+ int width, int height) {
+ int y;
+ void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+ int width) = ARGBToUVJ422Row_C;
+ void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ ARGBToYJRow_C;
+ if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 &&
+ dst_stride_y == width &&
+ dst_stride_u * 2 == width &&
+ dst_stride_v * 2 == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_ARGBTOUVJ422ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJ422ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJ422Row = ARGBToUVJ422Row_NEON;
+ }
+ }
+#endif
+
+#if defined(HAS_ARGBTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToUVJ422Row(src_argb, dst_u, dst_v, width);
+ ARGBToYJRow(src_argb, dst_y, width);
+ src_argb += src_stride_argb;
+ dst_y += dst_stride_y;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ return 0;
+}
+
// Convert ARGB to J400.
LIBYUV_API
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
int width, int height) {
int y;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
+ void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_yj || width <= 0 || height == 0) {
return -1;
@@ -1091,19 +1263,15 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_yj = 0;
}
#if defined(HAS_ARGBTOYJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
@@ -1111,7 +1279,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
diff --git a/TMessagesProj/jni/libyuv/source/convert_to_argb.cc b/TMessagesProj/jni/libyuv/source/convert_to_argb.cc
index 1b228a7b4..af829fbd3 100644
--- a/TMessagesProj/jni/libyuv/source/convert_to_argb.cc
+++ b/TMessagesProj/jni/libyuv/source/convert_to_argb.cc
@@ -11,7 +11,6 @@
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
@@ -144,36 +143,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
- // TODO(fbarchard): Support cropping Bayer by odd numbers
- // by adjusting fourcc.
- case FOURCC_BGGR:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerBGGRToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- case FOURCC_GBRG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGBRGToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- case FOURCC_GRBG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGRBGToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- case FOURCC_RGGB:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerRGGBToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToARGB(src, src_width,
@@ -205,15 +174,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
-// case FOURCC_Q420:
-// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
-// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
-// src_width + crop_x * 2;
-// r = Q420ToARGB(src, src_width * 3,
-// src_uv, src_width * 3,
-// crop_argb, argb_stride,
-// crop_width, inv_crop_height);
-// break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
@@ -241,6 +201,25 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
crop_width, inv_crop_height);
break;
}
+
+ case FOURCC_J420: {
+ const uint8* src_y = sample + (src_width * crop_y + crop_x);
+ const uint8* src_u;
+ const uint8* src_v;
+ int halfwidth = (src_width + 1) / 2;
+ int halfheight = (abs_src_height + 1) / 2;
+ src_u = sample + src_width * abs_src_height +
+ (halfwidth * crop_y + crop_x) / 2;
+ src_v = sample + src_width * abs_src_height +
+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+ r = J420ToARGB(src_y, src_width,
+ src_u, halfwidth,
+ src_v, halfwidth,
+ crop_argb, argb_stride,
+ crop_width, inv_crop_height);
+ break;
+ }
+
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
diff --git a/TMessagesProj/jni/libyuv/source/convert_to_i420.cc b/TMessagesProj/jni/libyuv/source/convert_to_i420.cc
index 7b194fff7..5e75369b5 100644
--- a/TMessagesProj/jni/libyuv/source/convert_to_i420.cc
+++ b/TMessagesProj/jni/libyuv/source/convert_to_i420.cc
@@ -12,7 +12,6 @@
#include "libyuv/convert.h"
-#include "libyuv/format_conversion.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
@@ -173,40 +172,6 @@ int ConvertToI420(const uint8* sample,
v, v_stride,
crop_width, inv_crop_height);
break;
- // TODO(fbarchard): Support cropping Bayer by odd numbers
- // by adjusting fourcc.
- case FOURCC_BGGR:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerBGGRToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_GBRG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGBRGToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_GRBG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGRBGToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGGB:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerRGGBToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToI420(src, src_width,
@@ -218,7 +183,8 @@ int ConvertToI420(const uint8* sample,
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
+ src_uv = sample + (src_width * src_height) +
+ ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
@@ -228,7 +194,8 @@ int ConvertToI420(const uint8* sample,
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
+ src_uv = sample + (src_width * src_height) +
+ ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
// Call NV12 but with u and v parameters swapped.
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
@@ -245,17 +212,6 @@ int ConvertToI420(const uint8* sample,
v, v_stride,
crop_width, inv_crop_height);
break;
- case FOURCC_Q420:
- src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
- src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
- src_width + crop_x * 2;
- r = Q420ToI420(src, src_width * 3,
- src_uv, src_width * 3,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
diff --git a/TMessagesProj/jni/libyuv/source/cpu_id.cc b/TMessagesProj/jni/libyuv/source/cpu_id.cc
index deb4c4465..ff7bdbd92 100644
--- a/TMessagesProj/jni/libyuv/source/cpu_id.cc
+++ b/TMessagesProj/jni/libyuv/source/cpu_id.cc
@@ -14,8 +14,8 @@
#include // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- !defined(__native_client__) && \
- defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
+ !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
+ defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
#include // For _xgetbv()
#endif
@@ -36,19 +36,20 @@ extern "C" {
// For functions that use the stack and have runtime checks for overflow,
// use SAFEBUFFERS to avoid additional check.
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
+#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
#define SAFEBUFFERS __declspec(safebuffers)
#else
#define SAFEBUFFERS
#endif
-// Low level cpuid for X86. Returns zeros on other CPUs.
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- (defined(_M_IX86) || defined(_M_X64) || \
- defined(__i386__) || defined(__x86_64__))
+// Low level cpuid for X86.
+#if (defined(_M_IX86) || defined(_M_X64) || \
+ defined(__i386__) || defined(__x86_64__)) && \
+ !defined(__pnacl__) && !defined(__CLR_VER)
LIBYUV_API
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
#if defined(_MSC_VER) && !defined(__clang__)
+// Visual C version uses intrinsic or inline x86 assembly.
#if (_MSC_FULL_VER >= 160040219)
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
#elif defined(_M_IX86)
@@ -62,16 +63,17 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
mov [edi + 8], ecx
mov [edi + 12], edx
}
-#else
+#else // Visual C but not x86
if (info_ecx == 0) {
__cpuid((int*)(cpu_info), info_eax);
} else {
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
}
#endif
-#else // defined(_MSC_VER)
+// GCC version uses inline x86 assembly.
+#else // defined(_MSC_VER) && !defined(__clang__)
uint32 info_ebx, info_edx;
- asm volatile ( // NOLINT
+ asm volatile (
#if defined( __i386__) && defined(__PIC__)
// Preserve ebx for fpic 32 bit.
"mov %%ebx, %%edi \n"
@@ -87,35 +89,47 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
cpu_info[1] = info_ebx;
cpu_info[2] = info_ecx;
cpu_info[3] = info_edx;
-#endif // defined(_MSC_VER)
+#endif // defined(_MSC_VER) && !defined(__clang__)
}
-
-#if !defined(__native_client__)
-#define HAS_XGETBV
-// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int TestOsSaveYmm() {
- uint32 xcr0 = 0u;
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
- xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
-#elif defined(_M_IX86) && defined(_MSC_VER)
- __asm {
- xor ecx, ecx // xcr 0
- _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
- mov xcr0, eax
- }
-#elif defined(__i386__) || defined(__x86_64__)
- asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
-#endif // defined(_MSC_VER)
- return((xcr0 & 6) == 6); // Is ymm saved?
-}
-#endif // !defined(__native_client__)
-#else
+#else // (defined(_M_IX86) || defined(_M_X64) ...
LIBYUV_API
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif
+// For VS2010 and earlier emit can be used:
+// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
+// __asm {
+// xor ecx, ecx // xcr 0
+// xgetbv
+// mov xcr0, eax
+// }
+// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
+// https://code.google.com/p/libyuv/issues/detail?id=529
+#if defined(_M_IX86) && (_MSC_VER < 1900)
+#pragma optimize("g", off)
+#endif
+#if (defined(_M_IX86) || defined(_M_X64) || \
+ defined(__i386__) || defined(__x86_64__)) && \
+ !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
+#define HAS_XGETBV
+// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
+int GetXCR0() {
+ uint32 xcr0 = 0u;
+#if (_MSC_FULL_VER >= 160040219)
+ xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
+#elif defined(__i386__) || defined(__x86_64__)
+ asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
+#endif // defined(__i386__) || defined(__x86_64__)
+ return xcr0;
+}
+#endif // defined(_M_IX86) || defined(_M_X64) ..
+// Return optimization to previous setting.
+#if defined(_M_IX86) && (_MSC_VER < 1900)
+#pragma optimize("g", on)
+#endif
+
// based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS
@@ -134,36 +148,21 @@ int ArmCpuCaps(const char* cpuinfo_name) {
fclose(f);
return kCpuHasNEON;
}
+ // aarch64 uses asimd for Neon.
+ p = strstr(cpuinfo_line, " asimd");
+ if (p && (p[6] == ' ' || p[6] == '\n')) {
+ fclose(f);
+ return kCpuHasNEON;
+ }
}
}
fclose(f);
return 0;
}
-#if defined(__mips__) && defined(__linux__)
-static int MipsCpuCaps(const char* search_string) {
- char cpuinfo_line[512];
- const char* file_name = "/proc/cpuinfo";
- FILE* f = fopen(file_name, "r");
- if (!f) {
- // Assume DSP if /proc/cpuinfo is unavailable.
- // This will occur for Chrome sandbox for Pepper or Render process.
- return kCpuHasMIPS_DSP;
- }
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
- if (strstr(cpuinfo_line, search_string) != NULL) {
- fclose(f);
- return kCpuHasMIPS_DSP;
- }
- }
- fclose(f);
- return 0;
-}
-#endif
-
// CPU detect function for SIMD instruction sets.
LIBYUV_API
-int cpu_info_ = kCpuInit; // cpu_info is not initialized yet.
+int cpu_info_ = 0; // cpu_info is not initialized yet.
// Test environment variable for disabling CPU features. Any non-zero value
// to disable. Zero ignored to make it easy to set the variable on/off.
@@ -186,8 +185,9 @@ static LIBYUV_BOOL TestEnv(const char*) {
LIBYUV_API SAFEBUFFERS
int InitCpuFlags(void) {
+ // TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
+ int cpu_info = 0;
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
-
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
@@ -196,92 +196,102 @@ int InitCpuFlags(void) {
if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7);
}
- cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
- ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
- ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
- ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
- ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
- ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
- kCpuHasX86;
+ cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
+ ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
+ ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
+ ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
+ ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
+ ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
+ kCpuHasX86;
#ifdef HAS_XGETBV
- if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
- TestOsSaveYmm()) { // Saves YMM.
- cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
- kCpuHasAVX;
+ // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
+ if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
+ ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
+ cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
+
+ // Detect AVX512bw
+ if ((GetXCR0() & 0xe0) == 0xe0) {
+ cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
+ }
}
#endif
+
// Environment variable overrides for testing.
if (TestEnv("LIBYUV_DISABLE_X86")) {
- cpu_info_ &= ~kCpuHasX86;
+ cpu_info &= ~kCpuHasX86;
}
if (TestEnv("LIBYUV_DISABLE_SSE2")) {
- cpu_info_ &= ~kCpuHasSSE2;
+ cpu_info &= ~kCpuHasSSE2;
}
if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
- cpu_info_ &= ~kCpuHasSSSE3;
+ cpu_info &= ~kCpuHasSSSE3;
}
if (TestEnv("LIBYUV_DISABLE_SSE41")) {
- cpu_info_ &= ~kCpuHasSSE41;
+ cpu_info &= ~kCpuHasSSE41;
}
if (TestEnv("LIBYUV_DISABLE_SSE42")) {
- cpu_info_ &= ~kCpuHasSSE42;
+ cpu_info &= ~kCpuHasSSE42;
}
if (TestEnv("LIBYUV_DISABLE_AVX")) {
- cpu_info_ &= ~kCpuHasAVX;
+ cpu_info &= ~kCpuHasAVX;
}
if (TestEnv("LIBYUV_DISABLE_AVX2")) {
- cpu_info_ &= ~kCpuHasAVX2;
+ cpu_info &= ~kCpuHasAVX2;
}
if (TestEnv("LIBYUV_DISABLE_ERMS")) {
- cpu_info_ &= ~kCpuHasERMS;
+ cpu_info &= ~kCpuHasERMS;
}
if (TestEnv("LIBYUV_DISABLE_FMA3")) {
- cpu_info_ &= ~kCpuHasFMA3;
+ cpu_info &= ~kCpuHasFMA3;
+ }
+ if (TestEnv("LIBYUV_DISABLE_AVX3")) {
+ cpu_info &= ~kCpuHasAVX3;
}
-#elif defined(__mips__) && defined(__linux__)
- // Linux mips parse text file for dsp detect.
- cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
-#if defined(__mips_dspr2)
- cpu_info_ |= kCpuHasMIPS_DSPR2;
#endif
- cpu_info_ |= kCpuHasMIPS;
+#if defined(__mips__) && defined(__linux__)
+#if defined(__mips_dspr2)
+ cpu_info |= kCpuHasMIPS_DSPR2;
+#endif
+ cpu_info |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_MIPS")) {
- cpu_info_ &= ~kCpuHasMIPS;
- }
- if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
- cpu_info_ &= ~kCpuHasMIPS_DSP;
+ cpu_info &= ~kCpuHasMIPS;
}
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
- cpu_info_ &= ~kCpuHasMIPS_DSPR2;
+ cpu_info &= ~kCpuHasMIPS_DSPR2;
}
-#elif defined(__arm__) || defined(__aarch64__)
+#endif
+#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
- cpu_info_ = kCpuHasNEON;
+ cpu_info = kCpuHasNEON;
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
// flag in it.
// So for aarch64, neon enabling is hard coded here.
-#elif defined(__aarch64__)
- cpu_info_ = kCpuHasNEON;
+#endif
+#if defined(__aarch64__)
+ cpu_info = kCpuHasNEON;
#else
// Linux arm parse text file for neon detect.
- cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
+ cpu_info = ArmCpuCaps("/proc/cpuinfo");
#endif
- cpu_info_ |= kCpuHasARM;
+ cpu_info |= kCpuHasARM;
if (TestEnv("LIBYUV_DISABLE_NEON")) {
- cpu_info_ &= ~kCpuHasNEON;
+ cpu_info &= ~kCpuHasNEON;
}
#endif // __arm__
if (TestEnv("LIBYUV_DISABLE_ASM")) {
- cpu_info_ = 0;
+ cpu_info = 0;
}
- return cpu_info_;
+ cpu_info |= kCpuInitialized;
+ cpu_info_ = cpu_info;
+ return cpu_info;
}
+// Note that use of this function is not thread safe.
LIBYUV_API
void MaskCpuFlags(int enable_flags) {
cpu_info_ = InitCpuFlags() & enable_flags;
diff --git a/TMessagesProj/jni/libyuv/source/format_conversion.cc b/TMessagesProj/jni/libyuv/source/format_conversion.cc
deleted file mode 100644
index 3c1737153..000000000
--- a/TMessagesProj/jni/libyuv/source/format_conversion.cc
+++ /dev/null
@@ -1,554 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/format_conversion.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/video_common.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// generate a selector mask useful for pshufb
-static uint32 GenerateSelector(int select0, int select1) {
- return (uint32)(select0) |
- (uint32)((select1 + 4) << 8) |
- (uint32)((select0 + 8) << 16) |
- (uint32)((select1 + 12) << 24);
-}
-
-static int MakeSelectors(const int blue_index,
- const int green_index,
- const int red_index,
- uint32 dst_fourcc_bayer,
- uint32* index_map) {
- // Now build a lookup table containing the indices for the four pixels in each
- // 2x2 Bayer grid.
- switch (dst_fourcc_bayer) {
- case FOURCC_BGGR:
- index_map[0] = GenerateSelector(blue_index, green_index);
- index_map[1] = GenerateSelector(green_index, red_index);
- break;
- case FOURCC_GBRG:
- index_map[0] = GenerateSelector(green_index, blue_index);
- index_map[1] = GenerateSelector(red_index, green_index);
- break;
- case FOURCC_RGGB:
- index_map[0] = GenerateSelector(red_index, green_index);
- index_map[1] = GenerateSelector(green_index, blue_index);
- break;
- case FOURCC_GRBG:
- index_map[0] = GenerateSelector(green_index, red_index);
- index_map[1] = GenerateSelector(blue_index, green_index);
- break;
- default:
- return -1; // Bad FourCC
- }
- return 0;
-}
-
-// Converts 32 bit ARGB to Bayer RGB formats.
-LIBYUV_API
-int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height,
- uint32 dst_fourcc_bayer) {
- int y;
- const int blue_index = 0; // Offsets for ARGB format
- const int green_index = 1;
- const int red_index = 2;
- uint32 index_map[2];
- void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) = ARGBToBayerRow_C;
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_SSSE3;
- }
- }
-#elif defined(HAS_ARGBTOBAYERROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_NEON;
- }
- }
-#endif
- if (MakeSelectors(blue_index, green_index, red_index,
- dst_fourcc_bayer, index_map)) {
- return -1; // Bad FourCC
- }
-
- for (y = 0; y < height; ++y) {
- ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
- src_argb += src_stride_argb;
- dst_bayer += dst_stride_bayer;
- }
- return 0;
-}
-
-#define AVG(a, b) (((a) + (b)) >> 1)
-
-static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 g = src_bayer0[1];
- uint8 r = src_bayer1[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = src_bayer0[0];
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = AVG(r, src_bayer1[1]);
- dst_argb[3] = 255U;
- dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = src_bayer1[1];
- dst_argb[7] = 255U;
- g = src_bayer0[1];
- r = src_bayer1[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = src_bayer0[0];
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = AVG(r, src_bayer1[1]);
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer0[0];
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = src_bayer1[1];
- dst_argb[7] = 255U;
- }
-}
-
-static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 g = src_bayer0[1];
- uint8 b = src_bayer1[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = AVG(b, src_bayer1[1]);
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = src_bayer0[0];
- dst_argb[3] = 255U;
- dst_argb[4] = src_bayer1[1];
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[7] = 255U;
- g = src_bayer0[1];
- b = src_bayer1[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = AVG(b, src_bayer1[1]);
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = src_bayer0[0];
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer1[1];
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = src_bayer0[0];
- dst_argb[7] = 255U;
- }
-}
-
-static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 b = src_bayer0[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = AVG(b, src_bayer0[1]);
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = src_bayer1[0];
- dst_argb[3] = 255U;
- dst_argb[4] = src_bayer0[1];
- dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
- dst_argb[7] = 255U;
- b = src_bayer0[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = AVG(b, src_bayer0[1]);
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = src_bayer1[0];
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer0[1];
- dst_argb[5] = src_bayer0[0];
- dst_argb[6] = src_bayer1[0];
- dst_argb[7] = 255U;
- }
-}
-
-static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 r = src_bayer0[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = src_bayer1[0];
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = AVG(r, src_bayer0[1]);
- dst_argb[3] = 255U;
- dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
- dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[6] = src_bayer0[1];
- dst_argb[7] = 255U;
- r = src_bayer0[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = src_bayer1[0];
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = AVG(r, src_bayer0[1]);
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer1[0];
- dst_argb[5] = src_bayer0[0];
- dst_argb[6] = src_bayer0[1];
- dst_argb[7] = 255U;
- }
-}
-
-// Converts any Bayer RGB format to ARGB.
-LIBYUV_API
-int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
- uint32 src_fourcc_bayer) {
- int y;
- void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
- void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- switch (src_fourcc_bayer) {
- case FOURCC_BGGR:
- BayerRow0 = BayerRowBG;
- BayerRow1 = BayerRowGR;
- break;
- case FOURCC_GBRG:
- BayerRow0 = BayerRowGB;
- BayerRow1 = BayerRowRG;
- break;
- case FOURCC_GRBG:
- BayerRow0 = BayerRowGR;
- BayerRow1 = BayerRowBG;
- break;
- case FOURCC_RGGB:
- BayerRow0 = BayerRowRG;
- BayerRow1 = BayerRowGB;
- break;
- default:
- return -1; // Bad FourCC
- }
-
- for (y = 0; y < height - 1; y += 2) {
- BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
- BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
- dst_argb + dst_stride_argb, width);
- src_bayer += src_stride_bayer * 2;
- dst_argb += dst_stride_argb * 2;
- }
- if (height & 1) {
- BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
- }
- return 0;
-}
-
-// Converts any Bayer RGB format to ARGB.
-LIBYUV_API
-int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- uint32 src_fourcc_bayer) {
- void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
- void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
-
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- int halfheight;
- height = -height;
- halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_u = dst_u + (halfheight - 1) * dst_stride_u;
- dst_v = dst_v + (halfheight - 1) * dst_stride_v;
- dst_stride_y = -dst_stride_y;
- dst_stride_u = -dst_stride_u;
- dst_stride_v = -dst_stride_v;
- }
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
-#endif
-
- switch (src_fourcc_bayer) {
- case FOURCC_BGGR:
- BayerRow0 = BayerRowBG;
- BayerRow1 = BayerRowGR;
- break;
- case FOURCC_GBRG:
- BayerRow0 = BayerRowGB;
- BayerRow1 = BayerRowRG;
- break;
- case FOURCC_GRBG:
- BayerRow0 = BayerRowGR;
- BayerRow1 = BayerRowBG;
- break;
- case FOURCC_RGGB:
- BayerRow0 = BayerRowRG;
- BayerRow1 = BayerRowGB;
- break;
- default:
- return -1; // Bad FourCC
- }
-
- {
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
- int y;
- for (y = 0; y < height - 1; y += 2) {
- BayerRow0(src_bayer, src_stride_bayer, row, width);
- BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
- row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
- src_bayer += src_stride_bayer * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- BayerRow0(src_bayer, src_stride_bayer, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- }
- free_aligned_buffer_64(row);
- }
- return 0;
-}
-
-// Convert I420 to Bayer.
-LIBYUV_API
-int I420ToBayer(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height,
- uint32 dst_fourcc_bayer) {
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
- void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) = ARGBToBayerRow_C;
- const int blue_index = 0; // Offsets for ARGB format
- const int green_index = 1;
- const int red_index = 2;
- uint32 index_map[2];
- // Negative height means invert the image.
- if (height < 0) {
- int halfheight;
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_SSSE3;
- }
- }
-#elif defined(HAS_ARGBTOBAYERROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_NEON;
- }
- }
-#endif
-
- if (MakeSelectors(blue_index, green_index, red_index,
- dst_fourcc_bayer, index_map)) {
- return -1; // Bad FourCC
- }
- {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- int y;
- for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, row, width);
- ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
- dst_bayer += dst_stride_bayer;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- free_aligned_buffer_64(row);
- }
- return 0;
-}
-
-#define MAKEBAYERFOURCC(BAYER) \
-LIBYUV_API \
-int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \
- uint8* dst_y, int dst_stride_y, \
- uint8* dst_u, int dst_stride_u, \
- uint8* dst_v, int dst_stride_v, \
- int width, int height) { \
- return BayerToI420(src_bayer, src_stride_bayer, \
- dst_y, dst_stride_y, \
- dst_u, dst_stride_u, \
- dst_v, dst_stride_v, \
- width, height, \
- FOURCC_##BAYER); \
-} \
- \
-LIBYUV_API \
-int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \
- const uint8* src_u, int src_stride_u, \
- const uint8* src_v, int src_stride_v, \
- uint8* dst_bayer, int dst_stride_bayer, \
- int width, int height) { \
- return I420ToBayer(src_y, src_stride_y, \
- src_u, src_stride_u, \
- src_v, src_stride_v, \
- dst_bayer, dst_stride_bayer, \
- width, height, \
- FOURCC_##BAYER); \
-} \
- \
-LIBYUV_API \
-int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \
- uint8* dst_bayer, int dst_stride_bayer, \
- int width, int height) { \
- return ARGBToBayer(src_argb, src_stride_argb, \
- dst_bayer, dst_stride_bayer, \
- width, height, \
- FOURCC_##BAYER); \
-} \
- \
-LIBYUV_API \
-int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \
- uint8* dst_argb, int dst_stride_argb, \
- int width, int height) { \
- return BayerToARGB(src_bayer, src_stride_bayer, \
- dst_argb, dst_stride_argb, \
- width, height, \
- FOURCC_##BAYER); \
-}
-
-MAKEBAYERFOURCC(BGGR)
-MAKEBAYERFOURCC(GBRG)
-MAKEBAYERFOURCC(GRBG)
-MAKEBAYERFOURCC(RGGB)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/TMessagesProj/jni/libyuv/source/mjpeg_decoder.cc b/TMessagesProj/jni/libyuv/source/mjpeg_decoder.cc
index 36028c3cc..50818418a 100644
--- a/TMessagesProj/jni/libyuv/source/mjpeg_decoder.cc
+++ b/TMessagesProj/jni/libyuv/source/mjpeg_decoder.cc
@@ -18,6 +18,12 @@
// Must be included before jpeglib.
#include
#define HAVE_SETJMP
+
+#if defined(_MSC_VER)
+// disable warning 4324: structure was padded due to __declspec(align())
+#pragma warning(disable:4324)
+#endif
+
#endif
struct FILE; // For jpeglib.h.
@@ -53,8 +59,7 @@ const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
// Methods that are passed to jpeglib.
boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
void init_source(jpeg_decompress_struct* cinfo);
-void skip_input_data(jpeg_decompress_struct* cinfo,
- long num_bytes); // NOLINT
+void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT
void term_source(jpeg_decompress_struct* cinfo);
void ErrorHandler(jpeg_common_struct* cinfo);
@@ -423,8 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
return TRUE;
}
-void skip_input_data(j_decompress_ptr cinfo,
- long num_bytes) { // NOLINT
+void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
cinfo->src->next_input_byte += num_bytes;
}
diff --git a/TMessagesProj/jni/libyuv/source/mjpeg_validate.cc b/TMessagesProj/jni/libyuv/source/mjpeg_validate.cc
index 23d22d099..9c4883204 100644
--- a/TMessagesProj/jni/libyuv/source/mjpeg_validate.cc
+++ b/TMessagesProj/jni/libyuv/source/mjpeg_validate.cc
@@ -10,36 +10,60 @@
#include "libyuv/mjpeg_decoder.h"
+#include // For memchr.
+
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
-// Helper function to validate the jpeg appears intact.
-// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
- size_t i;
- if (sample_size < 64) {
- // ERROR: Invalid jpeg size: sample_size
- return LIBYUV_FALSE;
- }
- if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
- // ERROR: Invalid jpeg initial start code
- return LIBYUV_FALSE;
- }
- for (i = sample_size - 2; i > 1;) {
- if (sample[i] != 0xd9) {
- if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image
+// Helper function to scan for EOI marker (0xff 0xd9).
+static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
+ if (sample_size >= 2) {
+ const uint8* end = sample + sample_size - 1;
+ const uint8* it = sample;
+ while (it < end) {
+ // TODO(fbarchard): scan for 0xd9 instead.
+ it = static_cast(memchr(it, 0xff, end - it));
+ if (it == NULL) {
+ break;
+ }
+ if (it[1] == 0xd9) {
return LIBYUV_TRUE; // Success: Valid jpeg.
}
- --i;
+ ++it; // Skip over current 0xff.
}
- --i;
}
// ERROR: Invalid jpeg end code not found. Size sample_size
return LIBYUV_FALSE;
}
+// Helper function to validate the jpeg appears intact.
+LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
+ // Maximum size that ValidateJpeg will consider valid.
+ const size_t kMaxJpegSize = 0x7fffffffull;
+ const size_t kBackSearchSize = 1024;
+ if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
+ // ERROR: Invalid jpeg size: sample_size
+ return LIBYUV_FALSE;
+ }
+ if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
+ // ERROR: Invalid jpeg initial start code
+ return LIBYUV_FALSE;
+ }
+
+ // Look for the End Of Image (EOI) marker near the end of the buffer.
+ if (sample_size > kBackSearchSize) {
+ if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
+ return LIBYUV_TRUE; // Success: Valid jpeg.
+ }
+ // Reduce search size for forward search.
+ sample_size = sample_size - kBackSearchSize + 1;
+ }
+ // Step over SOI marker and scan for EOI.
+ return ScanEOI(sample + 2, sample_size - 2);
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/TMessagesProj/jni/libyuv/source/planar_functions.cc b/TMessagesProj/jni/libyuv/source/planar_functions.cc
index 3857008ca..536e1d528 100644
--- a/TMessagesProj/jni/libyuv/source/planar_functions.cc
+++ b/TMessagesProj/jni/libyuv/source/planar_functions.cc
@@ -17,6 +17,7 @@
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/row.h"
+#include "libyuv/scale_row.h" // for ScaleRowDown2
#ifdef __cplusplus
namespace libyuv {
@@ -41,16 +42,14 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
if (src_y == dst_y && src_stride_y == dst_stride_y) {
return;
}
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
+#if defined(HAS_COPYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- CopyRow = CopyRow_SSE2;
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
@@ -59,8 +58,8 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
}
#endif
#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
@@ -90,15 +89,8 @@ void CopyPlane_16(const uint16* src_y, int src_stride_y,
height = 1;
src_stride_y = dst_stride_y = 0;
}
-#if defined(HAS_COPYROW_16_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_16_X86;
- }
-#endif
#if defined(HAS_COPYROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
CopyRow = CopyRow_16_SSE2;
}
#endif
@@ -239,25 +231,35 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
src_stride_y = -src_stride_y;
}
#if defined(HAS_MIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_NEON;
- }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_SSE2;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MirrorRow = MirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_NEON;
+ }
}
#endif
#if defined(HAS_MIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- MirrorRow = MirrorRow_SSSE3;
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ MirrorRow = MirrorRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_SSSE3;
+ }
}
#endif
#if defined(HAS_MIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
- MirrorRow = MirrorRow_AVX2;
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MirrorRow = MirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_AVX2;
+ }
+ }
+#endif
+// TODO(fbarchard): Mirror on mips handle unaligned memory.
+#if defined(HAS_MIRRORROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
+ IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+ IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
+ MirrorRow = MirrorRow_MIPS_DSPR2;
}
#endif
@@ -278,9 +280,9 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
int width, int height) {
int y;
void (*YUY2ToUV422Row)(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) =
+ uint8* dst_u, uint8* dst_v, int width) =
YUY2ToUV422Row_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
+ void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
YUY2ToYRow_C;
// Negative height means invert the image.
if (height < 0) {
@@ -298,23 +300,17 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
- YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
+ YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
}
}
#endif
#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
YUY2ToYRow = YUY2ToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
@@ -324,7 +320,7 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
}
#endif
#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width >= 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
@@ -356,10 +352,10 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
int width, int height) {
int y;
void (*UYVYToUV422Row)(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) =
+ uint8* dst_u, uint8* dst_v, int width) =
UYVYToUV422Row_C;
void (*UYVYToYRow)(const uint8* src_uyvy,
- uint8* dst_y, int pix) = UYVYToYRow_C;
+ uint8* dst_y, int width) = UYVYToYRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -376,23 +372,17 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_UYVYTOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
- UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
- UYVYToUV422Row = UYVYToUV422Row_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- UYVYToYRow = UYVYToYRow_SSE2;
- }
- }
+ UYVYToUV422Row = UYVYToUV422Row_SSE2;
+ UYVYToYRow = UYVYToYRow_SSE2;
}
}
#endif
#if defined(HAS_UYVYTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
UYVYToYRow = UYVYToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
@@ -402,7 +392,7 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
}
#endif
#if defined(HAS_UYVYTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width >= 16) {
UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
@@ -497,22 +487,28 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
-
-#if defined(HAS_ARGBMIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBMirrorRow = ARGBMirrorRow_SSSE3;
+#if defined(HAS_ARGBMIRRORROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_SSE2;
+ }
}
#endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
- ARGBMirrorRow = ARGBMirrorRow_AVX2;
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_NEON;
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_AVX2;
+ }
}
#endif
@@ -525,7 +521,7 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb,
return 0;
}
-// Get a blender that optimized for the CPU, alignment and pixel count.
+// Get a blender that optimized for the CPU and pixel count.
// As there are 6 blenders to choose from, the caller should try to use
// the same blend function for all pixels if possible.
LIBYUV_API
@@ -538,11 +534,6 @@ ARGBBlendRow GetARGBBlend() {
return ARGBBlendRow;
}
#endif
-#if defined(HAS_ARGBBLENDROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBBlendRow = ARGBBlendRow_SSE2;
- }
-#endif
#if defined(HAS_ARGBBLENDROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBBlendRow = ARGBBlendRow_NEON;
@@ -587,6 +578,167 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
return 0;
}
+// Alpha Blend plane and store to destination.
+LIBYUV_API
+int BlendPlane(const uint8* src_y0, int src_stride_y0,
+ const uint8* src_y1, int src_stride_y1,
+ const uint8* alpha, int alpha_stride,
+ uint8* dst_y, int dst_stride_y,
+ int width, int height) {
+ int y;
+ void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
+ if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+ // Coalesce rows for Y plane.
+ if (src_stride_y0 == width &&
+ src_stride_y1 == width &&
+ alpha_stride == width &&
+ dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
+ }
+
+#if defined(HAS_BLENDPLANEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ BlendPlaneRow = BlendPlaneRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BLENDPLANEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BlendPlaneRow = BlendPlaneRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ BlendPlaneRow = BlendPlaneRow_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
+ src_y0 += src_stride_y0;
+ src_y1 += src_stride_y1;
+ alpha += alpha_stride;
+ dst_y += dst_stride_y;
+ }
+ return 0;
+}
+
+#define MAXTWIDTH 2048
+// Alpha Blend YUV images and store to destination.
+LIBYUV_API
+int I420Blend(const uint8* src_y0, int src_stride_y0,
+ const uint8* src_u0, int src_stride_u0,
+ const uint8* src_v0, int src_stride_v0,
+ const uint8* src_y1, int src_stride_y1,
+ const uint8* src_u1, int src_stride_u1,
+ const uint8* src_v1, int src_stride_v1,
+ const uint8* alpha, int alpha_stride,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_u, int dst_stride_u,
+ uint8* dst_v, int dst_stride_v,
+ int width, int height) {
+ int y;
+ // Half width/height for UV.
+ int halfwidth = (width + 1) >> 1;
+ void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
+ void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
+ if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
+ !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+ // Blend Y plane.
+ BlendPlane(src_y0, src_stride_y0,
+ src_y1, src_stride_y1,
+ alpha, alpha_stride,
+ dst_y, dst_stride_y,
+ width, height);
+
+#if defined(HAS_BLENDPLANEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
+ if (IS_ALIGNED(halfwidth, 8)) {
+ BlendPlaneRow = BlendPlaneRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BLENDPLANEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BlendPlaneRow = BlendPlaneRow_Any_AVX2;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ BlendPlaneRow = BlendPlaneRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ ScaleRowDown2 = ScaleRowDown2Box_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ ScaleRowDown2 = ScaleRowDown2Box_AVX2;
+ }
+ }
+#endif
+
+ // Row buffer for intermediate alpha pixels.
+ align_buffer_64(halfalpha, halfwidth);
+ for (y = 0; y < height; y += 2) {
+ // last row of odd height image use 1 row of alpha instead of 2.
+ if (y == (height - 1)) {
+ alpha_stride = 0;
+ }
+ // Subsample 2 rows of UV to half width and half height.
+ ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
+ alpha += alpha_stride * 2;
+ BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
+ BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
+ src_u0 += src_stride_u0;
+ src_u1 += src_stride_u1;
+ dst_u += dst_stride_u;
+ src_v0 += src_stride_v0;
+ src_v1 += src_stride_v1;
+ dst_v += dst_stride_v;
+ }
+ free_aligned_buffer_64(halfalpha);
+ return 0;
+}
+
// Multiply 2 ARGB images and store to destination.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
@@ -614,7 +766,7 @@ int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
}
#if defined(HAS_ARGBMULTIPLYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
@@ -622,7 +774,7 @@ int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
}
#endif
#if defined(HAS_ARGBMULTIPLYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
@@ -630,7 +782,7 @@ int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
}
#endif
#if defined(HAS_ARGBMULTIPLYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBMultiplyRow = ARGBMultiplyRow_NEON;
@@ -674,13 +826,13 @@ int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
height = 1;
src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
}
-#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
+#if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBAddRow = ARGBAddRow_SSE2;
}
#endif
-#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+#if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBAddRow = ARGBAddRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBAddRow = ARGBAddRow_SSE2;
@@ -688,7 +840,7 @@ int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
}
#endif
#if defined(HAS_ARGBADDROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBAddRow = ARGBAddRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBAddRow = ARGBAddRow_AVX2;
@@ -696,7 +848,7 @@ int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
}
#endif
#if defined(HAS_ARGBADDROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBAddRow = ARGBAddRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBAddRow = ARGBAddRow_NEON;
@@ -741,7 +893,7 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
}
#if defined(HAS_ARGBSUBTRACTROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBSubtractRow = ARGBSubtractRow_SSE2;
@@ -749,7 +901,7 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
}
#endif
#if defined(HAS_ARGBSUBTRACTROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBSubtractRow = ARGBSubtractRow_AVX2;
@@ -757,7 +909,7 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
}
#endif
#if defined(HAS_ARGBSUBTRACTROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBSubtractRow = ARGBSubtractRow_NEON;
@@ -774,132 +926,67 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
}
return 0;
}
-
-// Convert I422 to BGRA.
-LIBYUV_API
-int I422ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height) {
+// Convert I422 to RGBA with matrix
+static int I422ToRGBAMatrix(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_rgba, int dst_stride_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width, int height) {
int y;
- void (*I422ToBGRARow)(const uint8* y_buf,
+ void (*I422ToRGBARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = I422ToBGRARow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_bgra ||
+ const struct YuvConstants* yuvconstants,
+ int width) = I422ToRGBARow_C;
+ if (!src_y || !src_u || !src_v || !dst_rgba ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
- dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
- dst_stride_bgra = -dst_stride_bgra;
+ dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
+ dst_stride_rgba = -dst_stride_rgba;
}
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_bgra == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
- }
-#if defined(HAS_I422TOBGRAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToBGRARow = I422ToBGRARow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToBGRARow = I422ToBGRARow_NEON;
- }
- }
-#elif defined(HAS_I422TOBGRAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
+#if defined(HAS_I422TORGBAROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
- I422ToBGRARow = I422ToBGRARow_SSSE3;
- }
+ I422ToRGBARow = I422ToRGBARow_SSSE3;
}
}
-#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
+#endif
+#if defined(HAS_I422TORGBAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGBARow = I422ToRGBARow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGBARow = I422ToRGBARow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGBARow = I422ToRGBARow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
- I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
+ IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
+ I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
- dst_bgra += dst_stride_bgra;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I422 to ABGR.
-LIBYUV_API
-int I422ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height) {
- int y;
- void (*I422ToABGRRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToABGRRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_abgr ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
- dst_stride_abgr = -dst_stride_abgr;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_abgr == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
- }
-#if defined(HAS_I422TOABGRROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToABGRRow = I422ToABGRRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToABGRRow = I422ToABGRRow_NEON;
- }
- }
-#elif defined(HAS_I422TOABGRROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
- I422ToABGRRow = I422ToABGRRow_SSSE3;
- }
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
- dst_abgr += dst_stride_abgr;
+ I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
+ dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
@@ -914,59 +1001,27 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height) {
- int y;
- void (*I422ToRGBARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGBARow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_rgba ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
- dst_stride_rgba = -dst_stride_rgba;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_rgba == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
- }
-#if defined(HAS_I422TORGBAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToRGBARow = I422ToRGBARow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToRGBARow = I422ToRGBARow_NEON;
- }
- }
-#elif defined(HAS_I422TORGBAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
- I422ToRGBARow = I422ToRGBARow_SSSE3;
- }
- }
- }
-#endif
+ return I422ToRGBAMatrix(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ dst_rgba, dst_stride_rgba,
+ &kYuvI601Constants,
+ width, height);
+}
- for (y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
- dst_rgba += dst_stride_rgba;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
+// Convert I422 to BGRA.
+LIBYUV_API
+int I422ToBGRA(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint8* dst_bgra, int dst_stride_bgra,
+ int width, int height) {
+ return I422ToRGBAMatrix(src_y, src_stride_y,
+ src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u,
+ dst_bgra, dst_stride_bgra,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
}
// Convert NV12 to RGB565.
@@ -979,6 +1034,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
void (*NV12ToRGB565Row)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) = NV12ToRGB565Row_C;
if (!src_y || !src_uv || !dst_rgb565 ||
width <= 0 || height == 0) {
@@ -991,14 +1047,23 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
dst_stride_rgb565 = -dst_stride_rgb565;
}
#if defined(HAS_NV12TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
}
}
-#elif defined(HAS_NV12TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+#endif
+#if defined(HAS_NV12TORGB565ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_NV12TORGB565ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToRGB565Row = NV12ToRGB565Row_NEON;
@@ -1007,7 +1072,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
- NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
+ NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@@ -1017,50 +1082,52 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
return 0;
}
-// Convert NV21 to RGB565.
+// Convert RAW to RGB24.
LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
+int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
+ uint8* dst_rgb24, int dst_stride_rgb24,
+ int width, int height) {
int y;
- void (*NV21ToRGB565Row)(const uint8* y_buf,
- const uint8* src_vu,
- uint8* rgb_buf,
- int width) = NV21ToRGB565Row_C;
- if (!src_y || !src_vu || !dst_rgb565 ||
+ void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) =
+ RAWToRGB24Row_C;
+ if (!src_raw || !dst_rgb24 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
+ src_raw = src_raw + (height - 1) * src_stride_raw;
+ src_stride_raw = -src_stride_raw;
}
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
+ // Coalesce rows.
+ if (src_stride_raw == width * 3 &&
+ dst_stride_rgb24 == width * 3) {
+ width *= height;
+ height = 1;
+ src_stride_raw = dst_stride_rgb24 = 0;
+ }
+#if defined(HAS_RAWTORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
+ RAWToRGB24Row = RAWToRGB24Row_SSSE3;
}
}
-#elif defined(HAS_NV21TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
+#endif
+#if defined(HAS_RAWTORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_NEON;
+ RAWToRGB24Row = RAWToRGB24Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
- NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_vu += src_stride_vu;
- }
+ RAWToRGB24Row(src_raw, dst_rgb24, width);
+ src_raw += src_stride_raw;
+ dst_rgb24 += dst_stride_rgb24;
}
return 0;
}
@@ -1070,8 +1137,12 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value) {
int y;
- uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
- void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
+ void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C;
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
// Coalesce rows.
if (dst_stride_y == width) {
width *= height;
@@ -1079,21 +1150,30 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
dst_stride_y = 0;
}
#if defined(HAS_SETROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) &&
- IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- SetRow = SetRow_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SetRow = SetRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SetRow = SetRow_NEON;
+ }
}
#endif
#if defined(HAS_SETROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- SetRow = SetRow_X86;
+ if (TestCpuFlag(kCpuHasX86)) {
+ SetRow = SetRow_Any_X86;
+ if (IS_ALIGNED(width, 4)) {
+ SetRow = SetRow_X86;
+ }
+ }
+#endif
+#if defined(HAS_SETROW_ERMS)
+ if (TestCpuFlag(kCpuHasERMS)) {
+ SetRow = SetRow_ERMS;
}
#endif
// Set plane
for (y = 0; y < height; ++y) {
- SetRow(dst_y, v32, width);
+ SetRow(dst_y, value, width);
dst_y += dst_stride_y;
}
}
@@ -1112,7 +1192,7 @@ int I420Rect(uint8* dst_y, int dst_stride_y,
uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
if (!dst_y || !dst_u || !dst_v ||
- width <= 0 || height <= 0 ||
+ width <= 0 || height == 0 ||
x < 0 || y < 0 ||
value_y < 0 || value_y > 255 ||
value_u < 0 || value_u > 255 ||
@@ -1132,11 +1212,18 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int dst_x, int dst_y,
int width, int height,
uint32 value) {
+ int y;
+ void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C;
if (!dst_argb ||
- width <= 0 || height <= 0 ||
+ width <= 0 || height == 0 ||
dst_x < 0 || dst_y < 0) {
return -1;
}
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
dst_argb += dst_y * dst_stride_argb + dst_x * 4;
// Coalesce rows.
if (dst_stride_argb == width * 4) {
@@ -1144,20 +1231,26 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
height = 1;
dst_stride_argb = 0;
}
-#if defined(HAS_SETROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
- return 0;
+
+#if defined(HAS_ARGBSETROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBSetRow = ARGBSetRow_Any_NEON;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSetRow = ARGBSetRow_NEON;
+ }
}
#endif
-#if defined(HAS_SETROW_X86)
+#if defined(HAS_ARGBSETROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
- ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
- return 0;
+ ARGBSetRow = ARGBSetRow_X86;
}
#endif
- ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
+
+ // Set plane
+ for (y = 0; y < height; ++y) {
+ ARGBSetRow(dst_argb, value, width);
+ dst_argb += dst_stride_argb;
+ }
return 0;
}
@@ -1196,18 +1289,8 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
height = 1;
src_stride_argb = dst_stride_argb = 0;
}
-#if defined(HAS_ARGBATTENUATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
- }
- }
-#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
@@ -1215,7 +1298,7 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBATTENUATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
@@ -1223,7 +1306,7 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBATTENUATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_NEON;
@@ -1263,7 +1346,7 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb = 0;
}
#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
@@ -1271,7 +1354,7 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
}
#endif
#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
@@ -1312,12 +1395,11 @@ int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb = 0;
}
#if defined(HAS_ARGBGRAYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
ARGBGrayRow = ARGBGrayRow_SSSE3;
}
-#elif defined(HAS_ARGBGRAYROW_NEON)
+#endif
+#if defined(HAS_ARGBGRAYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBGrayRow = ARGBGrayRow_NEON;
}
@@ -1350,11 +1432,11 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb,
dst_stride_argb = 0;
}
#if defined(HAS_ARGBGRAYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
ARGBGrayRow = ARGBGrayRow_SSSE3;
}
-#elif defined(HAS_ARGBGRAYROW_NEON)
+#endif
+#if defined(HAS_ARGBGRAYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBGrayRow = ARGBGrayRow_NEON;
}
@@ -1383,11 +1465,11 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
dst_stride_argb = 0;
}
#if defined(HAS_ARGBSEPIAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
ARGBSepiaRow = ARGBSepiaRow_SSSE3;
}
-#elif defined(HAS_ARGBSEPIAROW_NEON)
+#endif
+#if defined(HAS_ARGBSEPIAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBSepiaRow = ARGBSepiaRow_NEON;
}
@@ -1425,11 +1507,11 @@ int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb = 0;
}
#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
}
-#elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
+#endif
+#if defined(HAS_ARGBCOLORMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
}
@@ -1568,11 +1650,11 @@ int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
dst_stride_argb = 0;
}
#if defined(HAS_ARGBQUANTIZEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
}
-#elif defined(HAS_ARGBQUANTIZEROW_NEON)
+#endif
+#if defined(HAS_ARGBQUANTIZEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBQuantizeRow = ARGBQuantizeRow_NEON;
}
@@ -1743,12 +1825,11 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb = 0;
}
#if defined(HAS_ARGBSHADEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
ARGBShadeRow = ARGBShadeRow_SSE2;
}
-#elif defined(HAS_ARGBSHADEROW_NEON)
+#endif
+#if defined(HAS_ARGBSHADEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
ARGBShadeRow = ARGBShadeRow_NEON;
}
@@ -1762,91 +1843,121 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
return 0;
}
+// Interpolate 2 planes by specified amount (0 to 255).
+LIBYUV_API
+int InterpolatePlane(const uint8* src0, int src_stride0,
+ const uint8* src1, int src_stride1,
+ uint8* dst, int dst_stride,
+ int width, int height, int interpolation) {
+ int y;
+ void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst = dst + (height - 1) * dst_stride;
+ dst_stride = -dst_stride;
+ }
+ // Coalesce rows.
+ if (src_stride0 == width &&
+ src_stride1 == width &&
+ dst_stride == width) {
+ width *= height;
+ height = 1;
+ src_stride0 = src_stride1 = dst_stride = 0;
+ }
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
+ IS_ALIGNED(src0, 4) && IS_ALIGNED(src_stride0, 4) &&
+ IS_ALIGNED(src1, 4) && IS_ALIGNED(src_stride1, 4) &&
+ IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4) &&
+ IS_ALIGNED(width, 4)) {
+ InterpolateRow = InterpolateRow_MIPS_DSPR2;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ InterpolateRow(dst, src0, src1 - src0,
+ width, interpolation);
+ src0 += src_stride0;
+ src1 += src_stride1;
+ dst += dst_stride;
+ }
+ return 0;
+}
+
// Interpolate 2 ARGB images by specified amount (0 to 255).
LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation) {
- int y;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+ return InterpolatePlane(src_argb0, src_stride_argb0,
+ src_argb1, src_stride_argb1,
+ dst_argb, dst_stride_argb,
+ width * 4, height, interpolation);
+}
+
+// Interpolate 2 YUV images by specified amount (0 to 255).
+LIBYUV_API
+int I420Interpolate(const uint8* src0_y, int src0_stride_y,
+ const uint8* src0_u, int src0_stride_u,
+ const uint8* src0_v, int src0_stride_v,
+ const uint8* src1_y, int src1_stride_y,
+ const uint8* src1_u, int src1_stride_u,
+ const uint8* src1_v, int src1_stride_v,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_u, int dst_stride_u,
+ uint8* dst_v, int dst_stride_v,
+ int width, int height, int interpolation) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src0_y || !src0_u || !src0_v ||
+ !src1_y || !src1_u || !src1_v ||
+ !dst_y || !dst_u || !dst_v ||
+ width <= 0 || height == 0) {
return -1;
}
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
- IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
- IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
- IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
- IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
- width * 4, interpolation);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
+ InterpolatePlane(src0_y, src0_stride_y,
+ src1_y, src1_stride_y,
+ dst_y, dst_stride_y,
+ width, height, interpolation);
+ InterpolatePlane(src0_u, src0_stride_u,
+ src1_u, src1_stride_u,
+ dst_u, dst_stride_u,
+ halfwidth, halfheight, interpolation);
+ InterpolatePlane(src0_v, src0_stride_v,
+ src1_v, src1_stride_v,
+ dst_v, dst_stride_v,
+ halfwidth, halfheight, interpolation);
return 0;
}
@@ -1857,7 +1968,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
const uint8* shuffler, int width, int height) {
int y;
void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
- const uint8* shuffler, int pix) = ARGBShuffleRow_C;
+ const uint8* shuffler, int width) = ARGBShuffleRow_C;
if (!src_bgra || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -1876,7 +1987,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
src_stride_bgra = dst_stride_argb = 0;
}
#if defined(HAS_ARGBSHUFFLEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBShuffleRow = ARGBShuffleRow_SSE2;
@@ -1884,19 +1995,15 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
}
#endif
#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBShuffleRow = ARGBShuffleRow_SSSE3;
- }
+ ARGBShuffleRow = ARGBShuffleRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBSHUFFLEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBShuffleRow = ARGBShuffleRow_AVX2;
@@ -1904,7 +2011,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
}
#endif
#if defined(HAS_ARGBSHUFFLEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
+ if (TestCpuFlag(kCpuHasNEON)) {
ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
ARGBShuffleRow = ARGBShuffleRow_NEON;
@@ -1928,8 +2035,8 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
const uint8* src_sobely,
uint8* dst, int width)) {
int y;
- void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) = ARGBToBayerGGRow_C;
+ void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
+ ARGBToYJRow_C;
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C;
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
@@ -1945,33 +2052,32 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
- // ARGBToBayer used to select G channel from ARGB.
-#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
+
+#if defined(HAS_ARGBTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_SSSE3;
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOBAYERGGROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerGGRow_NEON;
+ ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
+
#if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelYRow = SobelYRow_SSE2;
@@ -1994,7 +2100,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
#endif
{
// 3 rows with edges before/after.
- const int kRowSize = (width + kEdge + 15) & ~15;
+ const int kRowSize = (width + kEdge + 31) & ~31;
align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
uint8* row_sobelx = rows;
uint8* row_sobely = rows + kRowSize;
@@ -2004,20 +2110,20 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
uint8* row_y0 = row_y + kEdge;
uint8* row_y1 = row_y0 + kRowSize;
uint8* row_y2 = row_y1 + kRowSize;
- ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
+ ARGBToYJRow(src_argb, row_y0, width);
row_y0[-1] = row_y0[0];
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
- ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
+ ARGBToYJRow(src_argb, row_y1, width);
row_y1[-1] = row_y1[0];
memset(row_y1 + width, row_y1[width - 1], 16);
memset(row_y2 + width, 0, 16);
for (y = 0; y < height; ++y) {
- // Convert next row of ARGB to Y.
+ // Convert next row of ARGB to G.
if (y < (height - 1)) {
src_argb += src_stride_argb;
}
- ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
+ ARGBToYJRow(src_argb, row_y2, width);
row_y2[-1] = row_y2[0];
row_y2[width] = row_y2[width - 1];
@@ -2048,14 +2154,19 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) = SobelRow_C;
#if defined(HAS_SOBELROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- SobelRow = SobelRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelRow = SobelRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SobelRow = SobelRow_SSE2;
+ }
}
#endif
#if defined(HAS_SOBELROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- SobelRow = SobelRow_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelRow = SobelRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ SobelRow = SobelRow_NEON;
+ }
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
@@ -2070,14 +2181,19 @@ int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_, int width) = SobelToPlaneRow_C;
#if defined(HAS_SOBELTOPLANEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- SobelToPlaneRow = SobelToPlaneRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SobelToPlaneRow = SobelToPlaneRow_SSE2;
+ }
}
#endif
#if defined(HAS_SOBELTOPLANEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
- SobelToPlaneRow = SobelToPlaneRow_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SobelToPlaneRow = SobelToPlaneRow_NEON;
+ }
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
@@ -2093,14 +2209,19 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) = SobelXYRow_C;
#if defined(HAS_SOBELXYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- SobelXYRow = SobelXYRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelXYRow = SobelXYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SobelXYRow = SobelXYRow_SSE2;
+ }
}
#endif
#if defined(HAS_SOBELXYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- SobelXYRow = SobelXYRow_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelXYRow = SobelXYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ SobelXYRow = SobelXYRow_NEON;
+ }
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
@@ -2218,16 +2339,19 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb = 0;
}
#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
- IS_ALIGNED(width, 8)) {
- ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
+ }
}
#endif
#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
- ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
+ }
}
#endif
@@ -2264,16 +2388,19 @@ int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
src_stride_y = dst_stride_argb = 0;
}
#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
- IS_ALIGNED(width, 8)) {
- ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
+ }
}
#endif
#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
- ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
+ }
}
#endif
@@ -2285,6 +2412,198 @@ int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
return 0;
}
+LIBYUV_API
+int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_uv, int dst_stride_uv,
+ int width, int height) {
+ int y;
+ int halfwidth = (width + 1) >> 1;
+ void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) = SplitUVRow_C;
+ void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ if (!src_yuy2 ||
+ !dst_y || !dst_uv ||
+ width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+ src_stride_yuy2 = -src_stride_yuy2;
+ }
+#if defined(HAS_SPLITUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SplitUVRow = SplitUVRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitUVRow = SplitUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitUVRow = SplitUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+
+ {
+ int awidth = halfwidth * 2;
+ // 2 rows of uv
+ align_buffer_64(rows, awidth * 2);
+
+ for (y = 0; y < height - 1; y += 2) {
+ // Split Y from UV.
+ SplitUVRow(src_yuy2, dst_y, rows, awidth);
+ SplitUVRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y,
+ rows + awidth, awidth);
+ InterpolateRow(dst_uv, rows, awidth, awidth, 128);
+ src_yuy2 += src_stride_yuy2 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ // Split Y from UV.
+ SplitUVRow(src_yuy2, dst_y, dst_uv, awidth);
+ }
+ free_aligned_buffer_64(rows);
+ }
+ return 0;
+}
+
+LIBYUV_API
+int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_uv, int dst_stride_uv,
+ int width, int height) {
+ int y;
+ int halfwidth = (width + 1) >> 1;
+ void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) = SplitUVRow_C;
+ void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ if (!src_uyvy ||
+ !dst_y || !dst_uv ||
+ width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+ src_stride_uyvy = -src_stride_uyvy;
+ }
+#if defined(HAS_SPLITUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SplitUVRow = SplitUVRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitUVRow = SplitUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitUVRow = SplitUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+
+ {
+ int awidth = halfwidth * 2;
+ // 2 rows of uv
+ align_buffer_64(rows, awidth * 2);
+
+ for (y = 0; y < height - 1; y += 2) {
+ // Split Y from UV.
+ SplitUVRow(src_uyvy, rows, dst_y, awidth);
+ SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth,
+ dst_y + dst_stride_y, awidth);
+ InterpolateRow(dst_uv, rows, awidth, awidth, 128);
+ src_uyvy += src_stride_uyvy * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ // Split Y from UV.
+ SplitUVRow(src_uyvy, dst_uv, dst_y, awidth);
+ }
+ free_aligned_buffer_64(rows);
+ }
+ return 0;
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/TMessagesProj/jni/libyuv/source/rotate.cc b/TMessagesProj/jni/libyuv/source/rotate.cc
index fe0e72b13..31e04af9c 100644
--- a/TMessagesProj/jni/libyuv/source/rotate.cc
+++ b/TMessagesProj/jni/libyuv/source/rotate.cc
@@ -13,6 +13,7 @@
#include "libyuv/cpu_id.h"
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h"
+#include "libyuv/rotate_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
@@ -20,829 +21,39 @@ namespace libyuv {
extern "C" {
#endif
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#if defined(__APPLE__) && defined(__i386__)
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".private_extern _" #name " \n" \
- ".align 4,0x90 \n" \
-"_" #name ": \n"
-#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".align 4,0x90 \n" \
-"_" #name ": \n"
-#else
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".align 4,0x90 \n" \
-#name ": \n"
-#endif
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_MIRRORROW_NEON
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-#define HAS_MIRRORROW_UV_NEON
-void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
-#define HAS_TRANSPOSE_WX8_NEON
-void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWX8_NEON
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width);
-#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__aarch64__) || defined(LIBYUV_NEON))
-// #define HAS_MIRRORROW_NEON
-// void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-// #define HAS_MIRRORROW_UV_NEON
-// void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
-// #define HAS_TRANSPOSE_WX8_NEON
-// void TransposeWx8_NEON(const uint8* src, int src_stride,
-// uint8* dst, int dst_stride, int width);
-// #define HAS_TRANSPOSE_UVWX8_NEON
-// void TransposeUVWx8_NEON(const uint8* src, int src_stride,
-// uint8* dst_a, int dst_stride_a,
-// uint8* dst_b, int dst_stride_b,
-// int width);
-#endif // defined(__ARM_NEON__)
-
-#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
- defined(__mips__) && \
- defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_TRANSPOSE_WX8_MIPS_DSPR2
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-
-void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width);
-#endif // defined(__mips__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
- defined(_M_IX86) && defined(_MSC_VER)
-#define HAS_TRANSPOSE_WX8_SSSE3
-__declspec(naked) __declspec(align(16))
-static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- __asm {
- push edi
- push esi
- push ebp
- mov eax, [esp + 12 + 4] // src
- mov edi, [esp + 12 + 8] // src_stride
- mov edx, [esp + 12 + 12] // dst
- mov esi, [esp + 12 + 16] // dst_stride
- mov ecx, [esp + 12 + 20] // width
-
- // Read in the data from the source pointer.
- // First round of bit swap.
- align 4
- convertloop:
- movq xmm0, qword ptr [eax]
- lea ebp, [eax + 8]
- movq xmm1, qword ptr [eax + edi]
- lea eax, [eax + 2 * edi]
- punpcklbw xmm0, xmm1
- movq xmm2, qword ptr [eax]
- movdqa xmm1, xmm0
- palignr xmm1, xmm1, 8
- movq xmm3, qword ptr [eax + edi]
- lea eax, [eax + 2 * edi]
- punpcklbw xmm2, xmm3
- movdqa xmm3, xmm2
- movq xmm4, qword ptr [eax]
- palignr xmm3, xmm3, 8
- movq xmm5, qword ptr [eax + edi]
- punpcklbw xmm4, xmm5
- lea eax, [eax + 2 * edi]
- movdqa xmm5, xmm4
- movq xmm6, qword ptr [eax]
- palignr xmm5, xmm5, 8
- movq xmm7, qword ptr [eax + edi]
- punpcklbw xmm6, xmm7
- mov eax, ebp
- movdqa xmm7, xmm6
- palignr xmm7, xmm7, 8
- // Second round of bit swap.
- punpcklwd xmm0, xmm2
- punpcklwd xmm1, xmm3
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- palignr xmm2, xmm2, 8
- palignr xmm3, xmm3, 8
- punpcklwd xmm4, xmm6
- punpcklwd xmm5, xmm7
- movdqa xmm6, xmm4
- movdqa xmm7, xmm5
- palignr xmm6, xmm6, 8
- palignr xmm7, xmm7, 8
- // Third round of bit swap.
- // Write to the destination pointer.
- punpckldq xmm0, xmm4
- movq qword ptr [edx], xmm0
- movdqa xmm4, xmm0
- palignr xmm4, xmm4, 8
- movq qword ptr [edx + esi], xmm4
- lea edx, [edx + 2 * esi]
- punpckldq xmm2, xmm6
- movdqa xmm6, xmm2
- palignr xmm6, xmm6, 8
- movq qword ptr [edx], xmm2
- punpckldq xmm1, xmm5
- movq qword ptr [edx + esi], xmm6
- lea edx, [edx + 2 * esi]
- movdqa xmm5, xmm1
- movq qword ptr [edx], xmm1
- palignr xmm5, xmm5, 8
- punpckldq xmm3, xmm7
- movq qword ptr [edx + esi], xmm5
- lea edx, [edx + 2 * esi]
- movq qword ptr [edx], xmm3
- movdqa xmm7, xmm3
- palignr xmm7, xmm7, 8
- sub ecx, 8
- movq qword ptr [edx + esi], xmm7
- lea edx, [edx + 2 * esi]
- jg convertloop
-
- pop ebp
- pop esi
- pop edi
- ret
- }
-}
-
-#define HAS_TRANSPOSE_UVWX8_SSE2
-__declspec(naked) __declspec(align(16))
-static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int w) {
- __asm {
- push ebx
- push esi
- push edi
- push ebp
- mov eax, [esp + 16 + 4] // src
- mov edi, [esp + 16 + 8] // src_stride
- mov edx, [esp + 16 + 12] // dst_a
- mov esi, [esp + 16 + 16] // dst_stride_a
- mov ebx, [esp + 16 + 20] // dst_b
- mov ebp, [esp + 16 + 24] // dst_stride_b
- mov ecx, esp
- sub esp, 4 + 16
- and esp, ~15
- mov [esp + 16], ecx
- mov ecx, [ecx + 16 + 28] // w
-
- align 4
- convertloop:
- // Read in the data from the source pointer.
- // First round of bit swap.
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm0 // use xmm7 as temp register.
- punpcklbw xmm0, xmm1
- punpckhbw xmm7, xmm1
- movdqa xmm1, xmm7
- movdqa xmm2, [eax]
- movdqa xmm3, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm2
- punpcklbw xmm2, xmm3
- punpckhbw xmm7, xmm3
- movdqa xmm3, xmm7
- movdqa xmm4, [eax]
- movdqa xmm5, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm4
- punpcklbw xmm4, xmm5
- punpckhbw xmm7, xmm5
- movdqa xmm5, xmm7
- movdqa xmm6, [eax]
- movdqa xmm7, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa [esp], xmm5 // backup xmm5
- neg edi
- movdqa xmm5, xmm6 // use xmm5 as temp register.
- punpcklbw xmm6, xmm7
- punpckhbw xmm5, xmm7
- movdqa xmm7, xmm5
- lea eax, [eax + 8 * edi + 16]
- neg edi
- // Second round of bit swap.
- movdqa xmm5, xmm0
- punpcklwd xmm0, xmm2
- punpckhwd xmm5, xmm2
- movdqa xmm2, xmm5
- movdqa xmm5, xmm1
- punpcklwd xmm1, xmm3
- punpckhwd xmm5, xmm3
- movdqa xmm3, xmm5
- movdqa xmm5, xmm4
- punpcklwd xmm4, xmm6
- punpckhwd xmm5, xmm6
- movdqa xmm6, xmm5
- movdqa xmm5, [esp] // restore xmm5
- movdqa [esp], xmm6 // backup xmm6
- movdqa xmm6, xmm5 // use xmm6 as temp register.
- punpcklwd xmm5, xmm7
- punpckhwd xmm6, xmm7
- movdqa xmm7, xmm6
- // Third round of bit swap.
- // Write to the destination pointer.
- movdqa xmm6, xmm0
- punpckldq xmm0, xmm4
- punpckhdq xmm6, xmm4
- movdqa xmm4, xmm6
- movdqa xmm6, [esp] // restore xmm6
- movlpd qword ptr [edx], xmm0
- movhpd qword ptr [ebx], xmm0
- movlpd qword ptr [edx + esi], xmm4
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm4
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm2 // use xmm0 as the temp register.
- punpckldq xmm2, xmm6
- movlpd qword ptr [edx], xmm2
- movhpd qword ptr [ebx], xmm2
- punpckhdq xmm0, xmm6
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm1 // use xmm0 as the temp register.
- punpckldq xmm1, xmm5
- movlpd qword ptr [edx], xmm1
- movhpd qword ptr [ebx], xmm1
- punpckhdq xmm0, xmm5
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm3 // use xmm0 as the temp register.
- punpckldq xmm3, xmm7
- movlpd qword ptr [edx], xmm3
- movhpd qword ptr [ebx], xmm3
- punpckhdq xmm0, xmm7
- sub ecx, 8
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- jg convertloop
-
- mov esp, [esp + 16]
- pop ebp
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-#elif !defined(LIBYUV_DISABLE_X86) && \
- (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
-#define HAS_TRANSPOSE_WX8_SSSE3
-static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
- "1: \n"
- "movq (%0),%%xmm0 \n"
- "movq (%0,%3),%%xmm1 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "movq (%0),%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "palignr $0x8,%%xmm1,%%xmm1 \n"
- "movq (%0,%3),%%xmm3 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "movq (%0),%%xmm4 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "movq (%0,%3),%%xmm5 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "movq (%0),%%xmm6 \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq (%0,%3),%%xmm7 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "neg %3 \n"
- "movdqa %%xmm6,%%xmm7 \n"
- "lea 0x8(%0,%3,8),%0 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "neg %3 \n"
- // Second round of bit swap.
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "palignr $0x8,%%xmm2,%%xmm2 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "movdqa %%xmm5,%%xmm7 \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "punpckldq %%xmm4,%%xmm0 \n"
- "movq %%xmm0,(%1) \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "palignr $0x8,%%xmm4,%%xmm4 \n"
- "movq %%xmm4,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "movq %%xmm2,(%1) \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movq %%xmm6,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm1,%%xmm5 \n"
- "movq %%xmm1,(%1) \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq %%xmm5,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movq %%xmm3,(%1) \n"
- "movdqa %%xmm3,%%xmm7 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "sub $0x8,%2 \n"
- "movq %%xmm7,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "r"((intptr_t)(dst_stride)) // %4
- : "memory", "cc"
- #if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- #endif
- );
-}
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)
-#define HAS_TRANSPOSE_UVWX8_SSE2
-void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int w);
- asm (
- DECLARE_FUNCTION(TransposeUVWx8_SSE2)
- "push %ebx \n"
- "push %esi \n"
- "push %edi \n"
- "push %ebp \n"
- "mov 0x14(%esp),%eax \n"
- "mov 0x18(%esp),%edi \n"
- "mov 0x1c(%esp),%edx \n"
- "mov 0x20(%esp),%esi \n"
- "mov 0x24(%esp),%ebx \n"
- "mov 0x28(%esp),%ebp \n"
- "mov %esp,%ecx \n"
- "sub $0x14,%esp \n"
- "and $0xfffffff0,%esp \n"
- "mov %ecx,0x10(%esp) \n"
- "mov 0x2c(%ecx),%ecx \n"
-
-"1: \n"
- "movdqa (%eax),%xmm0 \n"
- "movdqa (%eax,%edi,1),%xmm1 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm0,%xmm7 \n"
- "punpcklbw %xmm1,%xmm0 \n"
- "punpckhbw %xmm1,%xmm7 \n"
- "movdqa %xmm7,%xmm1 \n"
- "movdqa (%eax),%xmm2 \n"
- "movdqa (%eax,%edi,1),%xmm3 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm2,%xmm7 \n"
- "punpcklbw %xmm3,%xmm2 \n"
- "punpckhbw %xmm3,%xmm7 \n"
- "movdqa %xmm7,%xmm3 \n"
- "movdqa (%eax),%xmm4 \n"
- "movdqa (%eax,%edi,1),%xmm5 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm4,%xmm7 \n"
- "punpcklbw %xmm5,%xmm4 \n"
- "punpckhbw %xmm5,%xmm7 \n"
- "movdqa %xmm7,%xmm5 \n"
- "movdqa (%eax),%xmm6 \n"
- "movdqa (%eax,%edi,1),%xmm7 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm5,(%esp) \n"
- "neg %edi \n"
- "movdqa %xmm6,%xmm5 \n"
- "punpcklbw %xmm7,%xmm6 \n"
- "punpckhbw %xmm7,%xmm5 \n"
- "movdqa %xmm5,%xmm7 \n"
- "lea 0x10(%eax,%edi,8),%eax \n"
- "neg %edi \n"
- "movdqa %xmm0,%xmm5 \n"
- "punpcklwd %xmm2,%xmm0 \n"
- "punpckhwd %xmm2,%xmm5 \n"
- "movdqa %xmm5,%xmm2 \n"
- "movdqa %xmm1,%xmm5 \n"
- "punpcklwd %xmm3,%xmm1 \n"
- "punpckhwd %xmm3,%xmm5 \n"
- "movdqa %xmm5,%xmm3 \n"
- "movdqa %xmm4,%xmm5 \n"
- "punpcklwd %xmm6,%xmm4 \n"
- "punpckhwd %xmm6,%xmm5 \n"
- "movdqa %xmm5,%xmm6 \n"
- "movdqa (%esp),%xmm5 \n"
- "movdqa %xmm6,(%esp) \n"
- "movdqa %xmm5,%xmm6 \n"
- "punpcklwd %xmm7,%xmm5 \n"
- "punpckhwd %xmm7,%xmm6 \n"
- "movdqa %xmm6,%xmm7 \n"
- "movdqa %xmm0,%xmm6 \n"
- "punpckldq %xmm4,%xmm0 \n"
- "punpckhdq %xmm4,%xmm6 \n"
- "movdqa %xmm6,%xmm4 \n"
- "movdqa (%esp),%xmm6 \n"
- "movlpd %xmm0,(%edx) \n"
- "movhpd %xmm0,(%ebx) \n"
- "movlpd %xmm4,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm4,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm2,%xmm0 \n"
- "punpckldq %xmm6,%xmm2 \n"
- "movlpd %xmm2,(%edx) \n"
- "movhpd %xmm2,(%ebx) \n"
- "punpckhdq %xmm6,%xmm0 \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm1,%xmm0 \n"
- "punpckldq %xmm5,%xmm1 \n"
- "movlpd %xmm1,(%edx) \n"
- "movhpd %xmm1,(%ebx) \n"
- "punpckhdq %xmm5,%xmm0 \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm3,%xmm0 \n"
- "punpckldq %xmm7,%xmm3 \n"
- "movlpd %xmm3,(%edx) \n"
- "movhpd %xmm3,(%ebx) \n"
- "punpckhdq %xmm7,%xmm0 \n"
- "sub $0x8,%ecx \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "jg 1b \n"
- "mov 0x10(%esp),%esp \n"
- "pop %ebp \n"
- "pop %edi \n"
- "pop %esi \n"
- "pop %ebx \n"
-#if defined(__native_client__)
- "pop %ecx \n"
- "and $0xffffffe0,%ecx \n"
- "jmp *%ecx \n"
-#else
- "ret \n"
-#endif
-);
-#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
- defined(__x86_64__)
-// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
-#define HAS_TRANSPOSE_WX8_FAST_SSSE3
-static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
-"1: \n"
- "movdqa (%0),%%xmm0 \n"
- "movdqa (%0,%3),%%xmm1 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm0,%%xmm8 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm8 \n"
- "movdqa (%0),%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm8,%%xmm9 \n"
- "palignr $0x8,%%xmm1,%%xmm1 \n"
- "palignr $0x8,%%xmm9,%%xmm9 \n"
- "movdqa (%0,%3),%%xmm3 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm2,%%xmm10 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "punpckhbw %%xmm3,%%xmm10 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "movdqa %%xmm10,%%xmm11 \n"
- "movdqa (%0),%%xmm4 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "palignr $0x8,%%xmm11,%%xmm11 \n"
- "movdqa (%0,%3),%%xmm5 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm4,%%xmm12 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "punpckhbw %%xmm5,%%xmm12 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "movdqa %%xmm12,%%xmm13 \n"
- "movdqa (%0),%%xmm6 \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "palignr $0x8,%%xmm13,%%xmm13 \n"
- "movdqa (%0,%3),%%xmm7 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm6,%%xmm14 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "punpckhbw %%xmm7,%%xmm14 \n"
- "neg %3 \n"
- "movdqa %%xmm6,%%xmm7 \n"
- "movdqa %%xmm14,%%xmm15 \n"
- "lea 0x10(%0,%3,8),%0 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- "neg %3 \n"
- // Second round of bit swap.
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "palignr $0x8,%%xmm2,%%xmm2 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "movdqa %%xmm5,%%xmm7 \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "punpcklwd %%xmm10,%%xmm8 \n"
- "punpcklwd %%xmm11,%%xmm9 \n"
- "movdqa %%xmm8,%%xmm10 \n"
- "movdqa %%xmm9,%%xmm11 \n"
- "palignr $0x8,%%xmm10,%%xmm10 \n"
- "palignr $0x8,%%xmm11,%%xmm11 \n"
- "punpcklwd %%xmm14,%%xmm12 \n"
- "punpcklwd %%xmm15,%%xmm13 \n"
- "movdqa %%xmm12,%%xmm14 \n"
- "movdqa %%xmm13,%%xmm15 \n"
- "palignr $0x8,%%xmm14,%%xmm14 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "punpckldq %%xmm4,%%xmm0 \n"
- "movq %%xmm0,(%1) \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "palignr $0x8,%%xmm4,%%xmm4 \n"
- "movq %%xmm4,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "movq %%xmm2,(%1) \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movq %%xmm6,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm1,%%xmm5 \n"
- "movq %%xmm1,(%1) \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq %%xmm5,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movq %%xmm3,(%1) \n"
- "movdqa %%xmm3,%%xmm7 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "movq %%xmm7,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm12,%%xmm8 \n"
- "movq %%xmm8,(%1) \n"
- "movdqa %%xmm8,%%xmm12 \n"
- "palignr $0x8,%%xmm12,%%xmm12 \n"
- "movq %%xmm12,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm14,%%xmm10 \n"
- "movdqa %%xmm10,%%xmm14 \n"
- "movq %%xmm10,(%1) \n"
- "palignr $0x8,%%xmm14,%%xmm14 \n"
- "punpckldq %%xmm13,%%xmm9 \n"
- "movq %%xmm14,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm9,%%xmm13 \n"
- "movq %%xmm9,(%1) \n"
- "palignr $0x8,%%xmm13,%%xmm13 \n"
- "movq %%xmm13,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm15,%%xmm11 \n"
- "movq %%xmm11,(%1) \n"
- "movdqa %%xmm11,%%xmm15 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- "sub $0x10,%2 \n"
- "movq %%xmm15,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "r"((intptr_t)(dst_stride)) // %4
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
- "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
-);
-}
-
-#define HAS_TRANSPOSE_UVWX8_SSE2
-static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int w) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
-"1: \n"
- "movdqa (%0),%%xmm0 \n"
- "movdqa (%0,%4),%%xmm1 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm0,%%xmm8 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm1 \n"
- "movdqa (%0),%%xmm2 \n"
- "movdqa (%0,%4),%%xmm3 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm2,%%xmm8 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "punpckhbw %%xmm3,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm3 \n"
- "movdqa (%0),%%xmm4 \n"
- "movdqa (%0,%4),%%xmm5 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm4,%%xmm8 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "punpckhbw %%xmm5,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm5 \n"
- "movdqa (%0),%%xmm6 \n"
- "movdqa (%0,%4),%%xmm7 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm6,%%xmm8 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "neg %4 \n"
- "lea 0x10(%0,%4,8),%0 \n"
- "punpckhbw %%xmm7,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm7 \n"
- "neg %4 \n"
- // Second round of bit swap.
- "movdqa %%xmm0,%%xmm8 \n"
- "movdqa %%xmm1,%%xmm9 \n"
- "punpckhwd %%xmm2,%%xmm8 \n"
- "punpckhwd %%xmm3,%%xmm9 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm8,%%xmm2 \n"
- "movdqa %%xmm9,%%xmm3 \n"
- "movdqa %%xmm4,%%xmm8 \n"
- "movdqa %%xmm5,%%xmm9 \n"
- "punpckhwd %%xmm6,%%xmm8 \n"
- "punpckhwd %%xmm7,%%xmm9 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm8,%%xmm6 \n"
- "movdqa %%xmm9,%%xmm7 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "movdqa %%xmm0,%%xmm8 \n"
- "punpckldq %%xmm4,%%xmm0 \n"
- "movlpd %%xmm0,(%1) \n" // Write back U channel
- "movhpd %%xmm0,(%2) \n" // Write back V channel
- "punpckhdq %%xmm4,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm2,%%xmm8 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movlpd %%xmm2,(%1) \n"
- "movhpd %%xmm2,(%2) \n"
- "punpckhdq %%xmm6,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm1,%%xmm8 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movlpd %%xmm1,(%1) \n"
- "movhpd %%xmm1,(%2) \n"
- "punpckhdq %%xmm5,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm3,%%xmm8 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movlpd %%xmm3,(%1) \n"
- "movhpd %%xmm3,(%2) \n"
- "punpckhdq %%xmm7,%%xmm8 \n"
- "sub $0x8,%3 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst_a), // %1
- "+r"(dst_b), // %2
- "+r"(w) // %3
- : "r"((intptr_t)(src_stride)), // %4
- "r"((intptr_t)(dst_stride_a)), // %5
- "r"((intptr_t)(dst_stride_b)) // %6
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
- "xmm8", "xmm9"
-);
-}
-#endif
-#endif
-
-static void TransposeWx8_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst[0] = src[0 * src_stride];
- dst[1] = src[1 * src_stride];
- dst[2] = src[2 * src_stride];
- dst[3] = src[3 * src_stride];
- dst[4] = src[4 * src_stride];
- dst[5] = src[5 * src_stride];
- dst[6] = src[6 * src_stride];
- dst[7] = src[7 * src_stride];
- ++src;
- dst += dst_stride;
- }
-}
-
-static void TransposeWxH_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- int i;
- for (i = 0; i < width; ++i) {
- int j;
- for (j = 0; j < height; ++j) {
- dst[i * dst_stride + j] = src[j * src_stride + i];
- }
- }
-}
-
LIBYUV_API
void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
int i = height;
void (*TransposeWx8)(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) = TransposeWx8_C;
-#if defined(HAS_TRANSPOSE_WX8_NEON)
+ uint8* dst, int dst_stride, int width) = TransposeWx8_C;
+#if defined(HAS_TRANSPOSEWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeWx8_NEON;
}
#endif
-#if defined(HAS_TRANSPOSE_WX8_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
- TransposeWx8 = TransposeWx8_SSSE3;
+#if defined(HAS_TRANSPOSEWX8_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ TransposeWx8 = TransposeWx8_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ TransposeWx8 = TransposeWx8_SSSE3;
+ }
}
#endif
-#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) &&
- IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
- TransposeWx8 = TransposeWx8_FAST_SSSE3;
+#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ TransposeWx8 = TransposeWx8_Fast_SSSE3;
+ }
}
#endif
-#if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
+#if defined(HAS_TRANSPOSEWX8_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
if (IS_ALIGNED(width, 4) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
- TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
+ TransposeWx8 = TransposeWx8_Fast_MIPS_DSPR2;
} else {
TransposeWx8 = TransposeWx8_MIPS_DSPR2;
}
@@ -857,7 +68,9 @@ void TransposePlane(const uint8* src, int src_stride,
i -= 8;
}
- TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
+ if (i > 0) {
+ TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
+ }
}
LIBYUV_API
@@ -897,29 +110,30 @@ void RotatePlane180(const uint8* src, int src_stride,
void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_MIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_NEON;
- }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- MirrorRow = MirrorRow_SSE2;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MirrorRow = MirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_NEON;
+ }
}
#endif
#if defined(HAS_MIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- MirrorRow = MirrorRow_SSSE3;
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ MirrorRow = MirrorRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_SSSE3;
+ }
}
#endif
#if defined(HAS_MIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
- MirrorRow = MirrorRow_AVX2;
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MirrorRow = MirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_AVX2;
+ }
}
#endif
+// TODO(fbarchard): Mirror on mips handle unaligned memory.
#if defined(HAS_MIRRORROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
@@ -927,21 +141,14 @@ void RotatePlane180(const uint8* src, int src_stride,
MirrorRow = MirrorRow_MIPS_DSPR2;
}
#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
- }
-#endif
#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- CopyRow = CopyRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
@@ -949,6 +156,11 @@ void RotatePlane180(const uint8* src, int src_stride,
CopyRow = CopyRow_ERMS;
}
#endif
+#if defined(HAS_COPYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+ }
+#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
@@ -968,48 +180,6 @@ void RotatePlane180(const uint8* src, int src_stride,
free_aligned_buffer_64(row);
}
-static void TransposeUVWx8_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst_a[0] = src[0 * src_stride + 0];
- dst_b[0] = src[0 * src_stride + 1];
- dst_a[1] = src[1 * src_stride + 0];
- dst_b[1] = src[1 * src_stride + 1];
- dst_a[2] = src[2 * src_stride + 0];
- dst_b[2] = src[2 * src_stride + 1];
- dst_a[3] = src[3 * src_stride + 0];
- dst_b[3] = src[3 * src_stride + 1];
- dst_a[4] = src[4 * src_stride + 0];
- dst_b[4] = src[4 * src_stride + 1];
- dst_a[5] = src[5 * src_stride + 0];
- dst_b[5] = src[5 * src_stride + 1];
- dst_a[6] = src[6 * src_stride + 0];
- dst_b[6] = src[6 * src_stride + 1];
- dst_a[7] = src[7 * src_stride + 0];
- dst_b[7] = src[7 * src_stride + 1];
- src += 2;
- dst_a += dst_stride_a;
- dst_b += dst_stride_b;
- }
-}
-
-static void TransposeUVWxH_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- int i;
- for (i = 0; i < width * 2; i += 2) {
- int j;
- for (j = 0; j < height; ++j) {
- dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
- dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
- }
- }
-}
-
LIBYUV_API
void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
@@ -1020,17 +190,20 @@ void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) = TransposeUVWx8_C;
-#if defined(HAS_TRANSPOSE_UVWX8_NEON)
+#if defined(HAS_TRANSPOSEUVWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeUVWx8 = TransposeUVWx8_NEON;
}
-#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(width, 8) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
- TransposeUVWx8 = TransposeUVWx8_SSE2;
+#endif
+#if defined(HAS_TRANSPOSEUVWX8_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ TransposeUVWx8 = TransposeUVWx8_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ TransposeUVWx8 = TransposeUVWx8_SSE2;
+ }
}
-#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
+#endif
+#if defined(HAS_TRANSPOSEUVWX8_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
@@ -1049,10 +222,12 @@ void TransposeUV(const uint8* src, int src_stride,
i -= 8;
}
- TransposeUVWxH_C(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width, i);
+ if (i > 0) {
+ TransposeUVWxH_C(src, src_stride,
+ dst_a, dst_stride_a,
+ dst_b, dst_stride_b,
+ width, i);
+ }
}
LIBYUV_API
@@ -1098,12 +273,13 @@ void RotateUV180(const uint8* src, int src_stride,
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
MirrorRowUV = MirrorUVRow_NEON;
}
-#elif defined(HAS_MIRRORROW_UV_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
+#endif
+#if defined(HAS_MIRRORROW_UV_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
MirrorRowUV = MirrorUVRow_SSSE3;
}
-#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
+#endif
+#if defined(HAS_MIRRORUVROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
diff --git a/TMessagesProj/jni/libyuv/source/rotate_any.cc b/TMessagesProj/jni/libyuv/source/rotate_any.cc
new file mode 100644
index 000000000..d12bad5dc
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/source/rotate_any.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2015 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate.h"
+#include "libyuv/rotate_row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define TANY(NAMEANY, TPOS_SIMD, MASK) \
+ void NAMEANY(const uint8* src, int src_stride, \
+ uint8* dst, int dst_stride, int width) { \
+ int r = width & MASK; \
+ int n = width - r; \
+ if (n > 0) { \
+ TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
+ } \
+ TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\
+ }
+
+#ifdef HAS_TRANSPOSEWX8_NEON
+TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
+#endif
+#ifdef HAS_TRANSPOSEWX8_SSSE3
+TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
+#endif
+#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
+TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
+#endif
+#ifdef HAS_TRANSPOSEWX8_MIPS_DSPR2
+TANY(TransposeWx8_Any_MIPS_DSPR2, TransposeWx8_MIPS_DSPR2, 7)
+#endif
+#undef TANY
+
+#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
+ void NAMEANY(const uint8* src, int src_stride, \
+ uint8* dst_a, int dst_stride_a, \
+ uint8* dst_b, int dst_stride_b, int width) { \
+ int r = width & MASK; \
+ int n = width - r; \
+ if (n > 0) { \
+ TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, \
+ n); \
+ } \
+ TransposeUVWx8_C(src + n * 2, src_stride, \
+ dst_a + n * dst_stride_a, dst_stride_a, \
+ dst_b + n * dst_stride_b, dst_stride_b, r); \
+ }
+
+#ifdef HAS_TRANSPOSEUVWX8_NEON
+TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
+#endif
+#ifdef HAS_TRANSPOSEUVWX8_SSE2
+TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
+#endif
+#ifdef HAS_TRANSPOSEUVWX8_MIPS_DSPR2
+TUVANY(TransposeUVWx8_Any_MIPS_DSPR2, TransposeUVWx8_MIPS_DSPR2, 7)
+#endif
+#undef TUVANY
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+
+
+
+
diff --git a/TMessagesProj/jni/libyuv/source/rotate_argb.cc b/TMessagesProj/jni/libyuv/source/rotate_argb.cc
index ab0f9ce07..787c0ad1b 100644
--- a/TMessagesProj/jni/libyuv/source/rotate_argb.cc
+++ b/TMessagesProj/jni/libyuv/source/rotate_argb.cc
@@ -27,36 +27,31 @@ extern "C" {
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
- int src_stepx,
- uint8* dst_ptr, int dst_width);
+ int src_stepx, uint8* dst_ptr, int dst_width);
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBROWDOWNEVEN_NEON
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
- int src_stepx,
- uint8* dst_ptr, int dst_width);
+ int src_stepx, uint8* dst_ptr, int dst_width);
#endif
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
- int src_stepx,
- uint8* dst_ptr, int dst_width);
+ int src_stepx, uint8* dst_ptr, int dst_width);
static void ARGBTranspose(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
+ uint8* dst, int dst_stride, int width, int height) {
int i;
int src_pixel_step = src_stride >> 2;
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest.
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
}
-#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest.
- IS_ALIGNED(src, 4)) {
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
}
#endif
@@ -69,8 +64,7 @@ static void ARGBTranspose(const uint8* src, int src_stride,
}
void ARGBRotate90(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
+ uint8* dst, int dst_stride, int width, int height) {
// Rotate by 90 is a ARGBTranspose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
@@ -80,8 +74,7 @@ void ARGBRotate90(const uint8* src, int src_stride,
}
void ARGBRotate270(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
+ uint8* dst, int dst_stride, int width, int height) {
// Rotate by 270 is a ARGBTranspose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
@@ -91,8 +84,7 @@ void ARGBRotate270(const uint8* src, int src_stride,
}
void ARGBRotate180(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
+ uint8* dst, int dst_stride, int width, int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width * 4);
const uint8* src_bot = src + src_stride * (height - 1);
@@ -102,38 +94,38 @@ void ARGBRotate180(const uint8* src, int src_stride,
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
ARGBMirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_ARGBMIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- ARGBMirrorRow = ARGBMirrorRow_SSSE3;
+#if defined(HAS_ARGBMIRRORROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_SSE2;
+ }
}
#endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
- ARGBMirrorRow = ARGBMirrorRow_AVX2;
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- CopyRow = CopyRow_X86;
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_AVX2;
+ }
}
#endif
#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- CopyRow = CopyRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
@@ -141,6 +133,11 @@ void ARGBRotate180(const uint8* src, int src_stride,
CopyRow = CopyRow_ERMS;
}
#endif
+#if defined(HAS_COPYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+ }
+#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
@@ -162,8 +159,7 @@ void ARGBRotate180(const uint8* src, int src_stride,
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
+ uint8* dst_argb, int dst_stride_argb, int width, int height,
enum RotationMode mode) {
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
return -1;
diff --git a/TMessagesProj/jni/libyuv/source/rotate_common.cc b/TMessagesProj/jni/libyuv/source/rotate_common.cc
new file mode 100644
index 000000000..b33a9a0c6
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/source/rotate_common.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/rotate_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+void TransposeWx8_C(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst[0] = src[0 * src_stride];
+ dst[1] = src[1 * src_stride];
+ dst[2] = src[2 * src_stride];
+ dst[3] = src[3 * src_stride];
+ dst[4] = src[4 * src_stride];
+ dst[5] = src[5 * src_stride];
+ dst[6] = src[6 * src_stride];
+ dst[7] = src[7 * src_stride];
+ ++src;
+ dst += dst_stride;
+ }
+}
+
+void TransposeUVWx8_C(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst_a[0] = src[0 * src_stride + 0];
+ dst_b[0] = src[0 * src_stride + 1];
+ dst_a[1] = src[1 * src_stride + 0];
+ dst_b[1] = src[1 * src_stride + 1];
+ dst_a[2] = src[2 * src_stride + 0];
+ dst_b[2] = src[2 * src_stride + 1];
+ dst_a[3] = src[3 * src_stride + 0];
+ dst_b[3] = src[3 * src_stride + 1];
+ dst_a[4] = src[4 * src_stride + 0];
+ dst_b[4] = src[4 * src_stride + 1];
+ dst_a[5] = src[5 * src_stride + 0];
+ dst_b[5] = src[5 * src_stride + 1];
+ dst_a[6] = src[6 * src_stride + 0];
+ dst_b[6] = src[6 * src_stride + 1];
+ dst_a[7] = src[7 * src_stride + 0];
+ dst_b[7] = src[7 * src_stride + 1];
+ src += 2;
+ dst_a += dst_stride_a;
+ dst_b += dst_stride_b;
+ }
+}
+
+void TransposeWxH_C(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride,
+ int width, int height) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst[i * dst_stride + j] = src[j * src_stride + i];
+ }
+ }
+}
+
+void TransposeUVWxH_C(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b,
+ int width, int height) {
+ int i;
+ for (i = 0; i < width * 2; i += 2) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
+ dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
+ }
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/TMessagesProj/jni/libyuv/source/rotate_gcc.cc b/TMessagesProj/jni/libyuv/source/rotate_gcc.cc
new file mode 100644
index 000000000..cbe870caa
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/source/rotate_gcc.cc
@@ -0,0 +1,368 @@
+/*
+ * Copyright 2015 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/rotate_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+
+// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
+#if defined(HAS_TRANSPOSEWX8_SSSE3)
+void TransposeWx8_SSSE3(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width) {
+ asm volatile (
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movq (%0,%3),%%xmm1 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "movq (%0),%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "palignr $0x8,%%xmm1,%%xmm1 \n"
+ "movq (%0,%3),%%xmm3 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm3,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "movq (%0),%%xmm4 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "movq (%0,%3),%%xmm5 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm5,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "movq (%0),%%xmm6 \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "movq (%0,%3),%%xmm7 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm7,%%xmm6 \n"
+ "neg %3 \n"
+ "movdqa %%xmm6,%%xmm7 \n"
+ "lea 0x8(%0,%3,8),%0 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "neg %3 \n"
+ // Second round of bit swap.
+ "punpcklwd %%xmm2,%%xmm0 \n"
+ "punpcklwd %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "palignr $0x8,%%xmm2,%%xmm2 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "punpcklwd %%xmm6,%%xmm4 \n"
+ "punpcklwd %%xmm7,%%xmm5 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "movdqa %%xmm5,%%xmm7 \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ "punpckldq %%xmm4,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "movdqa %%xmm0,%%xmm4 \n"
+ "palignr $0x8,%%xmm4,%%xmm4 \n"
+ "movq %%xmm4,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm6,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "movq %%xmm2,(%1) \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "punpckldq %%xmm5,%%xmm1 \n"
+ "movq %%xmm6,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "movdqa %%xmm1,%%xmm5 \n"
+ "movq %%xmm1,(%1) \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "movq %%xmm5,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm7,%%xmm3 \n"
+ "movq %%xmm3,(%1) \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "sub $0x8,%2 \n"
+ "movq %%xmm7,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "r"((intptr_t)(dst_stride)) // %4
+ : "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
+
+// Transpose 16x8. 64 bit
+#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
+void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width) {
+ asm volatile (
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%0,%3),%%xmm1 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm0,%%xmm8 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm8 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm8,%%xmm9 \n"
+ "palignr $0x8,%%xmm1,%%xmm1 \n"
+ "palignr $0x8,%%xmm9,%%xmm9 \n"
+ "movdqu (%0,%3),%%xmm3 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm2,%%xmm10 \n"
+ "punpcklbw %%xmm3,%%xmm2 \n"
+ "punpckhbw %%xmm3,%%xmm10 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "movdqa %%xmm10,%%xmm11 \n"
+ "movdqu (%0),%%xmm4 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "palignr $0x8,%%xmm11,%%xmm11 \n"
+ "movdqu (%0,%3),%%xmm5 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm4,%%xmm12 \n"
+ "punpcklbw %%xmm5,%%xmm4 \n"
+ "punpckhbw %%xmm5,%%xmm12 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "movdqa %%xmm12,%%xmm13 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "palignr $0x8,%%xmm13,%%xmm13 \n"
+ "movdqu (%0,%3),%%xmm7 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm6,%%xmm14 \n"
+ "punpcklbw %%xmm7,%%xmm6 \n"
+ "punpckhbw %%xmm7,%%xmm14 \n"
+ "neg %3 \n"
+ "movdqa %%xmm6,%%xmm7 \n"
+ "movdqa %%xmm14,%%xmm15 \n"
+ "lea 0x10(%0,%3,8),%0 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "palignr $0x8,%%xmm15,%%xmm15 \n"
+ "neg %3 \n"
+ // Second round of bit swap.
+ "punpcklwd %%xmm2,%%xmm0 \n"
+ "punpcklwd %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "palignr $0x8,%%xmm2,%%xmm2 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "punpcklwd %%xmm6,%%xmm4 \n"
+ "punpcklwd %%xmm7,%%xmm5 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "movdqa %%xmm5,%%xmm7 \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "punpcklwd %%xmm10,%%xmm8 \n"
+ "punpcklwd %%xmm11,%%xmm9 \n"
+ "movdqa %%xmm8,%%xmm10 \n"
+ "movdqa %%xmm9,%%xmm11 \n"
+ "palignr $0x8,%%xmm10,%%xmm10 \n"
+ "palignr $0x8,%%xmm11,%%xmm11 \n"
+ "punpcklwd %%xmm14,%%xmm12 \n"
+ "punpcklwd %%xmm15,%%xmm13 \n"
+ "movdqa %%xmm12,%%xmm14 \n"
+ "movdqa %%xmm13,%%xmm15 \n"
+ "palignr $0x8,%%xmm14,%%xmm14 \n"
+ "palignr $0x8,%%xmm15,%%xmm15 \n"
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ "punpckldq %%xmm4,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "movdqa %%xmm0,%%xmm4 \n"
+ "palignr $0x8,%%xmm4,%%xmm4 \n"
+ "movq %%xmm4,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm6,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "movq %%xmm2,(%1) \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "punpckldq %%xmm5,%%xmm1 \n"
+ "movq %%xmm6,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "movdqa %%xmm1,%%xmm5 \n"
+ "movq %%xmm1,(%1) \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "movq %%xmm5,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm7,%%xmm3 \n"
+ "movq %%xmm3,(%1) \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "movq %%xmm7,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm12,%%xmm8 \n"
+ "movq %%xmm8,(%1) \n"
+ "movdqa %%xmm8,%%xmm12 \n"
+ "palignr $0x8,%%xmm12,%%xmm12 \n"
+ "movq %%xmm12,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm14,%%xmm10 \n"
+ "movdqa %%xmm10,%%xmm14 \n"
+ "movq %%xmm10,(%1) \n"
+ "palignr $0x8,%%xmm14,%%xmm14 \n"
+ "punpckldq %%xmm13,%%xmm9 \n"
+ "movq %%xmm14,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "movdqa %%xmm9,%%xmm13 \n"
+ "movq %%xmm9,(%1) \n"
+ "palignr $0x8,%%xmm13,%%xmm13 \n"
+ "movq %%xmm13,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm15,%%xmm11 \n"
+ "movq %%xmm11,(%1) \n"
+ "movdqa %%xmm11,%%xmm15 \n"
+ "palignr $0x8,%%xmm15,%%xmm15 \n"
+ "sub $0x10,%2 \n"
+ "movq %%xmm15,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "r"((intptr_t)(dst_stride)) // %4
+ : "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
+ );
+}
+#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
+
+// Transpose UV 8x8. 64 bit.
+#if defined(HAS_TRANSPOSEUVWX8_SSE2)
+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b, int width) {
+ asm volatile (
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%0,%4),%%xmm1 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm0,%%xmm8 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqu (%0,%4),%%xmm3 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm2,%%xmm8 \n"
+ "punpcklbw %%xmm3,%%xmm2 \n"
+ "punpckhbw %%xmm3,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm3 \n"
+ "movdqu (%0),%%xmm4 \n"
+ "movdqu (%0,%4),%%xmm5 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm4,%%xmm8 \n"
+ "punpcklbw %%xmm5,%%xmm4 \n"
+ "punpckhbw %%xmm5,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm5 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu (%0,%4),%%xmm7 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm6,%%xmm8 \n"
+ "punpcklbw %%xmm7,%%xmm6 \n"
+ "neg %4 \n"
+ "lea 0x10(%0,%4,8),%0 \n"
+ "punpckhbw %%xmm7,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm7 \n"
+ "neg %4 \n"
+ // Second round of bit swap.
+ "movdqa %%xmm0,%%xmm8 \n"
+ "movdqa %%xmm1,%%xmm9 \n"
+ "punpckhwd %%xmm2,%%xmm8 \n"
+ "punpckhwd %%xmm3,%%xmm9 \n"
+ "punpcklwd %%xmm2,%%xmm0 \n"
+ "punpcklwd %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm8,%%xmm2 \n"
+ "movdqa %%xmm9,%%xmm3 \n"
+ "movdqa %%xmm4,%%xmm8 \n"
+ "movdqa %%xmm5,%%xmm9 \n"
+ "punpckhwd %%xmm6,%%xmm8 \n"
+ "punpckhwd %%xmm7,%%xmm9 \n"
+ "punpcklwd %%xmm6,%%xmm4 \n"
+ "punpcklwd %%xmm7,%%xmm5 \n"
+ "movdqa %%xmm8,%%xmm6 \n"
+ "movdqa %%xmm9,%%xmm7 \n"
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ "movdqa %%xmm0,%%xmm8 \n"
+ "punpckldq %%xmm4,%%xmm0 \n"
+ "movlpd %%xmm0,(%1) \n" // Write back U channel
+ "movhpd %%xmm0,(%2) \n" // Write back V channel
+ "punpckhdq %%xmm4,%%xmm8 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "movdqa %%xmm2,%%xmm8 \n"
+ "punpckldq %%xmm6,%%xmm2 \n"
+ "movlpd %%xmm2,(%1) \n"
+ "movhpd %%xmm2,(%2) \n"
+ "punpckhdq %%xmm6,%%xmm8 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "movdqa %%xmm1,%%xmm8 \n"
+ "punpckldq %%xmm5,%%xmm1 \n"
+ "movlpd %%xmm1,(%1) \n"
+ "movhpd %%xmm1,(%2) \n"
+ "punpckhdq %%xmm5,%%xmm8 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "movdqa %%xmm3,%%xmm8 \n"
+ "punpckldq %%xmm7,%%xmm3 \n"
+ "movlpd %%xmm3,(%1) \n"
+ "movhpd %%xmm3,(%2) \n"
+ "punpckhdq %%xmm7,%%xmm8 \n"
+ "sub $0x8,%3 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst_a), // %1
+ "+r"(dst_b), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(src_stride)), // %4
+ "r"((intptr_t)(dst_stride_a)), // %5
+ "r"((intptr_t)(dst_stride_b)) // %6
+ : "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+ "xmm8", "xmm9"
+ );
+}
+#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
+#endif // defined(__x86_64__) || defined(__i386__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/TMessagesProj/jni/libyuv/source/rotate_mips.cc b/TMessagesProj/jni/libyuv/source/rotate_mips.cc
index 70770fd06..efe6bd909 100644
--- a/TMessagesProj/jni/libyuv/source/rotate_mips.cc
+++ b/TMessagesProj/jni/libyuv/source/rotate_mips.cc
@@ -9,6 +9,7 @@
*/
#include "libyuv/row.h"
+#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
@@ -22,8 +23,7 @@ extern "C" {
(_MIPS_SIM == _MIPS_SIM_ABI32)
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
+ uint8* dst, int dst_stride, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
@@ -106,9 +106,8 @@ void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
);
}
-void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
+void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width) {
__asm__ __volatile__ (
".set noat \n"
".set push \n"
diff --git a/TMessagesProj/jni/libyuv/source/rotate_neon.cc b/TMessagesProj/jni/libyuv/source/rotate_neon.cc
index d354e11fa..9e4ecd80d 100644
--- a/TMessagesProj/jni/libyuv/source/rotate_neon.cc
+++ b/TMessagesProj/jni/libyuv/source/rotate_neon.cc
@@ -9,6 +9,7 @@
*/
#include "libyuv/row.h"
+#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
@@ -17,7 +18,8 @@ namespace libyuv {
extern "C" {
#endif
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
@@ -33,7 +35,6 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
"sub %5, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
@@ -254,7 +255,6 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"sub %7, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
@@ -525,7 +525,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
);
}
-#endif
+#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/rotate_neon64.cc b/TMessagesProj/jni/libyuv/source/rotate_neon64.cc
index b080a2c6a..f52c082b3 100644
--- a/TMessagesProj/jni/libyuv/source/rotate_neon64.cc
+++ b/TMessagesProj/jni/libyuv/source/rotate_neon64.cc
@@ -9,6 +9,7 @@
*/
#include "libyuv/row.h"
+#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
@@ -17,522 +18,524 @@ namespace libyuv {
extern "C" {
#endif
+// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-//this ifdef should be removed if TransposeWx8_NEON's aarch64 has
-//been done
-#ifdef HAS_TRANSPOSE_WX8_NEON
+
static uvec8 kVTbl4x4Transpose =
- { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+ { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
+ uint8* dst, int dst_stride, int width) {
const uint8* src_temp = NULL;
+ int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
- "sub %5, #8 \n"
+ "sub %3, %3, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
- "1: \n"
- "mov %0, %1 \n"
+ "1: \n"
+ "mov %0, %1 \n"
MEMACCESS(0)
- "vld1.8 {d0}, [%0], %2 \n"
+ "ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.8 {d1}, [%0], %2 \n"
+ "ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.8 {d2}, [%0], %2 \n"
+ "ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.8 {d3}, [%0], %2 \n"
+ "ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.8 {d4}, [%0], %2 \n"
+ "ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.8 {d5}, [%0], %2 \n"
+ "ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.8 {d6}, [%0], %2 \n"
+ "ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.8 {d7}, [%0] \n"
+ "ld1 {v7.8b}, [%0] \n"
- "vtrn.8 d1, d0 \n"
- "vtrn.8 d3, d2 \n"
- "vtrn.8 d5, d4 \n"
- "vtrn.8 d7, d6 \n"
+ "trn2 v16.8b, v0.8b, v1.8b \n"
+ "trn1 v17.8b, v0.8b, v1.8b \n"
+ "trn2 v18.8b, v2.8b, v3.8b \n"
+ "trn1 v19.8b, v2.8b, v3.8b \n"
+ "trn2 v20.8b, v4.8b, v5.8b \n"
+ "trn1 v21.8b, v4.8b, v5.8b \n"
+ "trn2 v22.8b, v6.8b, v7.8b \n"
+ "trn1 v23.8b, v6.8b, v7.8b \n"
- "vtrn.16 d1, d3 \n"
- "vtrn.16 d0, d2 \n"
- "vtrn.16 d5, d7 \n"
- "vtrn.16 d4, d6 \n"
+ "trn2 v3.4h, v17.4h, v19.4h \n"
+ "trn1 v1.4h, v17.4h, v19.4h \n"
+ "trn2 v2.4h, v16.4h, v18.4h \n"
+ "trn1 v0.4h, v16.4h, v18.4h \n"
+ "trn2 v7.4h, v21.4h, v23.4h \n"
+ "trn1 v5.4h, v21.4h, v23.4h \n"
+ "trn2 v6.4h, v20.4h, v22.4h \n"
+ "trn1 v4.4h, v20.4h, v22.4h \n"
- "vtrn.32 d1, d5 \n"
- "vtrn.32 d0, d4 \n"
- "vtrn.32 d3, d7 \n"
- "vtrn.32 d2, d6 \n"
+ "trn2 v21.2s, v1.2s, v5.2s \n"
+ "trn1 v17.2s, v1.2s, v5.2s \n"
+ "trn2 v20.2s, v0.2s, v4.2s \n"
+ "trn1 v16.2s, v0.2s, v4.2s \n"
+ "trn2 v23.2s, v3.2s, v7.2s \n"
+ "trn1 v19.2s, v3.2s, v7.2s \n"
+ "trn2 v22.2s, v2.2s, v6.2s \n"
+ "trn1 v18.2s, v2.2s, v6.2s \n"
- "vrev16.8 q0, q0 \n"
- "vrev16.8 q1, q1 \n"
- "vrev16.8 q2, q2 \n"
- "vrev16.8 q3, q3 \n"
-
- "mov %0, %3 \n"
+ "mov %0, %2 \n"
MEMACCESS(0)
- "vst1.8 {d1}, [%0], %4 \n"
+ "st1 {v17.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d0}, [%0], %4 \n"
+ "st1 {v16.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d3}, [%0], %4 \n"
+ "st1 {v19.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d2}, [%0], %4 \n"
+ "st1 {v18.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d5}, [%0], %4 \n"
+ "st1 {v21.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d4}, [%0], %4 \n"
+ "st1 {v20.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d7}, [%0], %4 \n"
+ "st1 {v23.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d6}, [%0] \n"
+ "st1 {v22.8b}, [%0] \n"
- "add %1, #8 \n" // src += 8
- "add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
- "subs %5, #8 \n" // w -= 8
- "bge 1b \n"
+ "add %1, %1, #8 \n" // src += 8
+ "add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
+ "subs %3, %3, #8 \n" // w -= 8
+ "b.ge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
- "adds %5, #8 \n"
- "beq 4f \n"
+ "adds %3, %3, #8 \n"
+ "b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
- "cmp %5, #2 \n"
- "blt 3f \n"
+ "cmp %3, #2 \n"
+ "b.lt 3f \n"
- "cmp %5, #4 \n"
- "blt 2f \n"
+ "cmp %3, #4 \n"
+ "b.lt 2f \n"
// 4x8 block
- "mov %0, %1 \n"
+ "mov %0, %1 \n"
MEMACCESS(0)
- "vld1.32 {d0[0]}, [%0], %2 \n"
+ "ld1 {v0.s}[0], [%0], %5 \n"
MEMACCESS(0)
- "vld1.32 {d0[1]}, [%0], %2 \n"
+ "ld1 {v0.s}[1], [%0], %5 \n"
MEMACCESS(0)
- "vld1.32 {d1[0]}, [%0], %2 \n"
+ "ld1 {v0.s}[2], [%0], %5 \n"
MEMACCESS(0)
- "vld1.32 {d1[1]}, [%0], %2 \n"
+ "ld1 {v0.s}[3], [%0], %5 \n"
MEMACCESS(0)
- "vld1.32 {d2[0]}, [%0], %2 \n"
+ "ld1 {v1.s}[0], [%0], %5 \n"
MEMACCESS(0)
- "vld1.32 {d2[1]}, [%0], %2 \n"
+ "ld1 {v1.s}[1], [%0], %5 \n"
MEMACCESS(0)
- "vld1.32 {d3[0]}, [%0], %2 \n"
+ "ld1 {v1.s}[2], [%0], %5 \n"
MEMACCESS(0)
- "vld1.32 {d3[1]}, [%0] \n"
+ "ld1 {v1.s}[3], [%0] \n"
- "mov %0, %3 \n"
+ "mov %0, %2 \n"
- MEMACCESS(6)
- "vld1.8 {q3}, [%6] \n"
+ MEMACCESS(4)
+ "ld1 {v2.16b}, [%4] \n"
- "vtbl.8 d4, {d0, d1}, d6 \n"
- "vtbl.8 d5, {d0, d1}, d7 \n"
- "vtbl.8 d0, {d2, d3}, d6 \n"
- "vtbl.8 d1, {d2, d3}, d7 \n"
+ "tbl v3.16b, {v0.16b}, v2.16b \n"
+ "tbl v0.16b, {v1.16b}, v2.16b \n"
// TODO(frkoenig): Rework shuffle above to
// write out with 4 instead of 8 writes.
MEMACCESS(0)
- "vst1.32 {d4[0]}, [%0], %4 \n"
+ "st1 {v3.s}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d4[1]}, [%0], %4 \n"
+ "st1 {v3.s}[1], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d5[0]}, [%0], %4 \n"
+ "st1 {v3.s}[2], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d5[1]}, [%0] \n"
+ "st1 {v3.s}[3], [%0] \n"
- "add %0, %3, #4 \n"
+ "add %0, %2, #4 \n"
MEMACCESS(0)
- "vst1.32 {d0[0]}, [%0], %4 \n"
+ "st1 {v0.s}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d0[1]}, [%0], %4 \n"
+ "st1 {v0.s}[1], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d1[0]}, [%0], %4 \n"
+ "st1 {v0.s}[2], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d1[1]}, [%0] \n"
+ "st1 {v0.s}[3], [%0] \n"
- "add %1, #4 \n" // src += 4
- "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
- "subs %5, #4 \n" // w -= 4
- "beq 4f \n"
+ "add %1, %1, #4 \n" // src += 4
+ "add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
+ "subs %3, %3, #4 \n" // w -= 4
+ "b.eq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
- "cmp %5, #2 \n"
- "blt 3f \n"
+ "cmp %3, #2 \n"
+ "b.lt 3f \n"
// 2x8 block
- "2: \n"
- "mov %0, %1 \n"
+ "2: \n"
+ "mov %0, %1 \n"
MEMACCESS(0)
- "vld1.16 {d0[0]}, [%0], %2 \n"
+ "ld1 {v0.h}[0], [%0], %5 \n"
MEMACCESS(0)
- "vld1.16 {d1[0]}, [%0], %2 \n"
+ "ld1 {v1.h}[0], [%0], %5 \n"
MEMACCESS(0)
- "vld1.16 {d0[1]}, [%0], %2 \n"
+ "ld1 {v0.h}[1], [%0], %5 \n"
MEMACCESS(0)
- "vld1.16 {d1[1]}, [%0], %2 \n"
+ "ld1 {v1.h}[1], [%0], %5 \n"
MEMACCESS(0)
- "vld1.16 {d0[2]}, [%0], %2 \n"
+ "ld1 {v0.h}[2], [%0], %5 \n"
MEMACCESS(0)
- "vld1.16 {d1[2]}, [%0], %2 \n"
+ "ld1 {v1.h}[2], [%0], %5 \n"
MEMACCESS(0)
- "vld1.16 {d0[3]}, [%0], %2 \n"
+ "ld1 {v0.h}[3], [%0], %5 \n"
MEMACCESS(0)
- "vld1.16 {d1[3]}, [%0] \n"
+ "ld1 {v1.h}[3], [%0] \n"
- "vtrn.8 d0, d1 \n"
+ "trn2 v2.8b, v0.8b, v1.8b \n"
+ "trn1 v3.8b, v0.8b, v1.8b \n"
- "mov %0, %3 \n"
+ "mov %0, %2 \n"
MEMACCESS(0)
- "vst1.64 {d0}, [%0], %4 \n"
+ "st1 {v3.8b}, [%0], %6 \n"
MEMACCESS(0)
- "vst1.64 {d1}, [%0] \n"
+ "st1 {v2.8b}, [%0] \n"
- "add %1, #2 \n" // src += 2
- "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
- "subs %5, #2 \n" // w -= 2
- "beq 4f \n"
+ "add %1, %1, #2 \n" // src += 2
+ "add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
+ "subs %3, %3, #2 \n" // w -= 2
+ "b.eq 4f \n"
// 1x8 block
- "3: \n"
+ "3: \n"
MEMACCESS(1)
- "vld1.8 {d0[0]}, [%1], %2 \n"
+ "ld1 {v0.b}[0], [%1], %5 \n"
MEMACCESS(1)
- "vld1.8 {d0[1]}, [%1], %2 \n"
+ "ld1 {v0.b}[1], [%1], %5 \n"
MEMACCESS(1)
- "vld1.8 {d0[2]}, [%1], %2 \n"
+ "ld1 {v0.b}[2], [%1], %5 \n"
MEMACCESS(1)
- "vld1.8 {d0[3]}, [%1], %2 \n"
+ "ld1 {v0.b}[3], [%1], %5 \n"
MEMACCESS(1)
- "vld1.8 {d0[4]}, [%1], %2 \n"
+ "ld1 {v0.b}[4], [%1], %5 \n"
MEMACCESS(1)
- "vld1.8 {d0[5]}, [%1], %2 \n"
+ "ld1 {v0.b}[5], [%1], %5 \n"
MEMACCESS(1)
- "vld1.8 {d0[6]}, [%1], %2 \n"
+ "ld1 {v0.b}[6], [%1], %5 \n"
MEMACCESS(1)
- "vld1.8 {d0[7]}, [%1] \n"
+ "ld1 {v0.b}[7], [%1] \n"
- MEMACCESS(3)
- "vst1.64 {d0}, [%3] \n"
+ MEMACCESS(2)
+ "st1 {v0.8b}, [%2] \n"
- "4: \n"
+ "4: \n"
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(src_stride), // %2
- "+r"(dst), // %3
- "+r"(dst_stride), // %4
- "+r"(width) // %5
- : "r"(&kVTbl4x4Transpose) // %6
- : "memory", "cc", "q0", "q1", "q2", "q3"
+ : "+r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst), // %2
+ "+r"(width64) // %3
+ : "r"(&kVTbl4x4Transpose), // %4
+ "r"(static_cast(src_stride)), // %5
+ "r"(static_cast(dst_stride)) // %6
+ : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
+ "v17", "v18", "v19", "v20", "v21", "v22", "v23"
);
}
-#endif //HAS_TRANSPOSE_WX8_NEON
-//this ifdef should be removed if TransposeUVWx8_NEON's aarch64 has
-//been done
-#ifdef HAS_TRANSPOSE_UVWX8_NEON
-static uvec8 kVTbl4x4TransposeDi =
- { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
+static uint8 kVTbl4x4TransposeDi[32] =
+ { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
+ 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
+ int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
- "sub %7, #8 \n"
+ "sub %4, %4, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
"1: \n"
- "mov %0, %1 \n"
-
- MEMACCESS(0)
- "vld2.8 {d0, d1}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d2, d3}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d4, d5}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d6, d7}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d16, d17}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d18, d19}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d20, d21}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d22, d23}, [%0] \n"
-
- "vtrn.8 q1, q0 \n"
- "vtrn.8 q3, q2 \n"
- "vtrn.8 q9, q8 \n"
- "vtrn.8 q11, q10 \n"
-
- "vtrn.16 q1, q3 \n"
- "vtrn.16 q0, q2 \n"
- "vtrn.16 q9, q11 \n"
- "vtrn.16 q8, q10 \n"
-
- "vtrn.32 q1, q9 \n"
- "vtrn.32 q0, q8 \n"
- "vtrn.32 q3, q11 \n"
- "vtrn.32 q2, q10 \n"
-
- "vrev16.8 q0, q0 \n"
- "vrev16.8 q1, q1 \n"
- "vrev16.8 q2, q2 \n"
- "vrev16.8 q3, q3 \n"
- "vrev16.8 q8, q8 \n"
- "vrev16.8 q9, q9 \n"
- "vrev16.8 q10, q10 \n"
- "vrev16.8 q11, q11 \n"
-
- "mov %0, %3 \n"
+ "mov %0, %1 \n"
MEMACCESS(0)
- "vst1.8 {d2}, [%0], %4 \n"
+ "ld1 {v0.16b}, [%0], %5 \n"
MEMACCESS(0)
- "vst1.8 {d0}, [%0], %4 \n"
+ "ld1 {v1.16b}, [%0], %5 \n"
MEMACCESS(0)
- "vst1.8 {d6}, [%0], %4 \n"
+ "ld1 {v2.16b}, [%0], %5 \n"
MEMACCESS(0)
- "vst1.8 {d4}, [%0], %4 \n"
+ "ld1 {v3.16b}, [%0], %5 \n"
MEMACCESS(0)
- "vst1.8 {d18}, [%0], %4 \n"
+ "ld1 {v4.16b}, [%0], %5 \n"
MEMACCESS(0)
- "vst1.8 {d16}, [%0], %4 \n"
+ "ld1 {v5.16b}, [%0], %5 \n"
MEMACCESS(0)
- "vst1.8 {d22}, [%0], %4 \n"
+ "ld1 {v6.16b}, [%0], %5 \n"
MEMACCESS(0)
- "vst1.8 {d20}, [%0] \n"
+ "ld1 {v7.16b}, [%0] \n"
- "mov %0, %5 \n"
+ "trn1 v16.16b, v0.16b, v1.16b \n"
+ "trn2 v17.16b, v0.16b, v1.16b \n"
+ "trn1 v18.16b, v2.16b, v3.16b \n"
+ "trn2 v19.16b, v2.16b, v3.16b \n"
+ "trn1 v20.16b, v4.16b, v5.16b \n"
+ "trn2 v21.16b, v4.16b, v5.16b \n"
+ "trn1 v22.16b, v6.16b, v7.16b \n"
+ "trn2 v23.16b, v6.16b, v7.16b \n"
+
+ "trn1 v0.8h, v16.8h, v18.8h \n"
+ "trn2 v1.8h, v16.8h, v18.8h \n"
+ "trn1 v2.8h, v20.8h, v22.8h \n"
+ "trn2 v3.8h, v20.8h, v22.8h \n"
+ "trn1 v4.8h, v17.8h, v19.8h \n"
+ "trn2 v5.8h, v17.8h, v19.8h \n"
+ "trn1 v6.8h, v21.8h, v23.8h \n"
+ "trn2 v7.8h, v21.8h, v23.8h \n"
+
+ "trn1 v16.4s, v0.4s, v2.4s \n"
+ "trn2 v17.4s, v0.4s, v2.4s \n"
+ "trn1 v18.4s, v1.4s, v3.4s \n"
+ "trn2 v19.4s, v1.4s, v3.4s \n"
+ "trn1 v20.4s, v4.4s, v6.4s \n"
+ "trn2 v21.4s, v4.4s, v6.4s \n"
+ "trn1 v22.4s, v5.4s, v7.4s \n"
+ "trn2 v23.4s, v5.4s, v7.4s \n"
+
+ "mov %0, %2 \n"
MEMACCESS(0)
- "vst1.8 {d3}, [%0], %6 \n"
+ "st1 {v16.d}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d1}, [%0], %6 \n"
+ "st1 {v18.d}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d7}, [%0], %6 \n"
+ "st1 {v17.d}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d5}, [%0], %6 \n"
+ "st1 {v19.d}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d19}, [%0], %6 \n"
+ "st1 {v16.d}[1], [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d17}, [%0], %6 \n"
+ "st1 {v18.d}[1], [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d23}, [%0], %6 \n"
+ "st1 {v17.d}[1], [%0], %6 \n"
MEMACCESS(0)
- "vst1.8 {d21}, [%0] \n"
+ "st1 {v19.d}[1], [%0] \n"
- "add %1, #8*2 \n" // src += 8*2
- "add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
- "add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
- "subs %7, #8 \n" // w -= 8
- "bge 1b \n"
+ "mov %0, %3 \n"
+
+ MEMACCESS(0)
+ "st1 {v20.d}[0], [%0], %7 \n"
+ MEMACCESS(0)
+ "st1 {v22.d}[0], [%0], %7 \n"
+ MEMACCESS(0)
+ "st1 {v21.d}[0], [%0], %7 \n"
+ MEMACCESS(0)
+ "st1 {v23.d}[0], [%0], %7 \n"
+ MEMACCESS(0)
+ "st1 {v20.d}[1], [%0], %7 \n"
+ MEMACCESS(0)
+ "st1 {v22.d}[1], [%0], %7 \n"
+ MEMACCESS(0)
+ "st1 {v21.d}[1], [%0], %7 \n"
+ MEMACCESS(0)
+ "st1 {v23.d}[1], [%0] \n"
+
+ "add %1, %1, #16 \n" // src += 8*2
+ "add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a
+ "add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b
+ "subs %4, %4, #8 \n" // w -= 8
+ "b.ge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
- "adds %7, #8 \n"
- "beq 4f \n"
+ "adds %4, %4, #8 \n"
+ "b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
- "cmp %7, #2 \n"
- "blt 3f \n"
+ "cmp %4, #2 \n"
+ "b.lt 3f \n"
- "cmp %7, #4 \n"
- "blt 2f \n"
+ "cmp %4, #4 \n"
+ "b.lt 2f \n"
// TODO(frkoenig): Clean this up
// 4x8 block
- "mov %0, %1 \n"
+ "mov %0, %1 \n"
MEMACCESS(0)
- "vld1.64 {d0}, [%0], %2 \n"
+ "ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.64 {d1}, [%0], %2 \n"
+ "ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.64 {d2}, [%0], %2 \n"
+ "ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.64 {d3}, [%0], %2 \n"
+ "ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.64 {d4}, [%0], %2 \n"
+ "ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.64 {d5}, [%0], %2 \n"
+ "ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.64 {d6}, [%0], %2 \n"
+ "ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0)
- "vld1.64 {d7}, [%0] \n"
+ "ld1 {v7.8b}, [%0] \n"
MEMACCESS(8)
- "vld1.8 {q15}, [%8] \n"
+ "ld1 {v30.16b}, [%8], #16 \n"
+ "ld1 {v31.16b}, [%8] \n"
- "vtrn.8 q0, q1 \n"
- "vtrn.8 q2, q3 \n"
+ "tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
+ "tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
+ "tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
+ "tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
- "vtbl.8 d16, {d0, d1}, d30 \n"
- "vtbl.8 d17, {d0, d1}, d31 \n"
- "vtbl.8 d18, {d2, d3}, d30 \n"
- "vtbl.8 d19, {d2, d3}, d31 \n"
- "vtbl.8 d20, {d4, d5}, d30 \n"
- "vtbl.8 d21, {d4, d5}, d31 \n"
- "vtbl.8 d22, {d6, d7}, d30 \n"
- "vtbl.8 d23, {d6, d7}, d31 \n"
-
- "mov %0, %3 \n"
+ "mov %0, %2 \n"
MEMACCESS(0)
- "vst1.32 {d16[0]}, [%0], %4 \n"
+ "st1 {v16.s}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d16[1]}, [%0], %4 \n"
+ "st1 {v16.s}[1], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d17[0]}, [%0], %4 \n"
+ "st1 {v16.s}[2], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d17[1]}, [%0], %4 \n"
+ "st1 {v16.s}[3], [%0], %6 \n"
- "add %0, %3, #4 \n"
+ "add %0, %2, #4 \n"
MEMACCESS(0)
- "vst1.32 {d20[0]}, [%0], %4 \n"
+ "st1 {v18.s}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d20[1]}, [%0], %4 \n"
+ "st1 {v18.s}[1], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d21[0]}, [%0], %4 \n"
+ "st1 {v18.s}[2], [%0], %6 \n"
MEMACCESS(0)
- "vst1.32 {d21[1]}, [%0] \n"
+ "st1 {v18.s}[3], [%0] \n"
- "mov %0, %5 \n"
+ "mov %0, %3 \n"
MEMACCESS(0)
- "vst1.32 {d18[0]}, [%0], %6 \n"
+ "st1 {v17.s}[0], [%0], %7 \n"
MEMACCESS(0)
- "vst1.32 {d18[1]}, [%0], %6 \n"
+ "st1 {v17.s}[1], [%0], %7 \n"
MEMACCESS(0)
- "vst1.32 {d19[0]}, [%0], %6 \n"
+ "st1 {v17.s}[2], [%0], %7 \n"
MEMACCESS(0)
- "vst1.32 {d19[1]}, [%0], %6 \n"
+ "st1 {v17.s}[3], [%0], %7 \n"
- "add %0, %5, #4 \n"
+ "add %0, %3, #4 \n"
MEMACCESS(0)
- "vst1.32 {d22[0]}, [%0], %6 \n"
+ "st1 {v19.s}[0], [%0], %7 \n"
MEMACCESS(0)
- "vst1.32 {d22[1]}, [%0], %6 \n"
+ "st1 {v19.s}[1], [%0], %7 \n"
MEMACCESS(0)
- "vst1.32 {d23[0]}, [%0], %6 \n"
+ "st1 {v19.s}[2], [%0], %7 \n"
MEMACCESS(0)
- "vst1.32 {d23[1]}, [%0] \n"
+ "st1 {v19.s}[3], [%0] \n"
- "add %1, #4*2 \n" // src += 4 * 2
- "add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
- "add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
- "subs %7, #4 \n" // w -= 4
- "beq 4f \n"
+ "add %1, %1, #8 \n" // src += 4 * 2
+ "add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a
+ "add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b
+ "subs %4, %4, #4 \n" // w -= 4
+ "b.eq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
- "cmp %7, #2 \n"
- "blt 3f \n"
+ "cmp %4, #2 \n"
+ "b.lt 3f \n"
// 2x8 block
"2: \n"
- "mov %0, %1 \n"
+ "mov %0, %1 \n"
MEMACCESS(0)
- "vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
+ "ld2 {v0.h, v1.h}[0], [%0], %5 \n"
MEMACCESS(0)
- "vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
+ "ld2 {v2.h, v3.h}[0], [%0], %5 \n"
MEMACCESS(0)
- "vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
+ "ld2 {v0.h, v1.h}[1], [%0], %5 \n"
MEMACCESS(0)
- "vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
+ "ld2 {v2.h, v3.h}[1], [%0], %5 \n"
MEMACCESS(0)
- "vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
+ "ld2 {v0.h, v1.h}[2], [%0], %5 \n"
MEMACCESS(0)
- "vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
+ "ld2 {v2.h, v3.h}[2], [%0], %5 \n"
MEMACCESS(0)
- "vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
+ "ld2 {v0.h, v1.h}[3], [%0], %5 \n"
MEMACCESS(0)
- "vld2.16 {d1[3], d3[3]}, [%0] \n"
+ "ld2 {v2.h, v3.h}[3], [%0] \n"
- "vtrn.8 d0, d1 \n"
- "vtrn.8 d2, d3 \n"
+ "trn1 v4.8b, v0.8b, v2.8b \n"
+ "trn2 v5.8b, v0.8b, v2.8b \n"
+ "trn1 v6.8b, v1.8b, v3.8b \n"
+ "trn2 v7.8b, v1.8b, v3.8b \n"
- "mov %0, %3 \n"
+ "mov %0, %2 \n"
MEMACCESS(0)
- "vst1.64 {d0}, [%0], %4 \n"
+ "st1 {v4.d}[0], [%0], %6 \n"
MEMACCESS(0)
- "vst1.64 {d2}, [%0] \n"
+ "st1 {v6.d}[0], [%0] \n"
- "mov %0, %5 \n"
+ "mov %0, %3 \n"
MEMACCESS(0)
- "vst1.64 {d1}, [%0], %6 \n"
+ "st1 {v5.d}[0], [%0], %7 \n"
MEMACCESS(0)
- "vst1.64 {d3}, [%0] \n"
+ "st1 {v7.d}[0], [%0] \n"
- "add %1, #2*2 \n" // src += 2 * 2
- "add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
- "add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
- "subs %7, #2 \n" // w -= 2
- "beq 4f \n"
+ "add %1, %1, #4 \n" // src += 2 * 2
+ "add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a
+ "add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b
+ "subs %4, %4, #2 \n" // w -= 2
+ "b.eq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
- "vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
+ "ld2 {v0.b, v1.b}[0], [%1], %5 \n"
MEMACCESS(1)
- "vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
+ "ld2 {v0.b, v1.b}[1], [%1], %5 \n"
MEMACCESS(1)
- "vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
+ "ld2 {v0.b, v1.b}[2], [%1], %5 \n"
MEMACCESS(1)
- "vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
+ "ld2 {v0.b, v1.b}[3], [%1], %5 \n"
MEMACCESS(1)
- "vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
+ "ld2 {v0.b, v1.b}[4], [%1], %5 \n"
MEMACCESS(1)
- "vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
+ "ld2 {v0.b, v1.b}[5], [%1], %5 \n"
MEMACCESS(1)
- "vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
+ "ld2 {v0.b, v1.b}[6], [%1], %5 \n"
MEMACCESS(1)
- "vld2.8 {d0[7], d1[7]}, [%1] \n"
+ "ld2 {v0.b, v1.b}[7], [%1] \n"
+ MEMACCESS(2)
+ "st1 {v0.d}[0], [%2] \n"
MEMACCESS(3)
- "vst1.64 {d0}, [%3] \n"
- MEMACCESS(5)
- "vst1.64 {d1}, [%5] \n"
+ "st1 {v1.d}[0], [%3] \n"
"4: \n"
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(src_stride), // %2
- "+r"(dst_a), // %3
- "+r"(dst_stride_a), // %4
- "+r"(dst_b), // %5
- "+r"(dst_stride_b), // %6
- "+r"(width) // %7
- : "r"(&kVTbl4x4TransposeDi) // %8
+ : "+r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst_a), // %2
+ "+r"(dst_b), // %3
+ "+r"(width64) // %4
+ : "r"(static_cast(src_stride)), // %5
+ "r"(static_cast(dst_stride_a)), // %6
+ "r"(static_cast(dst_stride_b)), // %7
+ "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc",
- "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v30", "v31"
);
}
-#endif // HAS_TRANSPOSE_UVWX8_NEON
-#endif // __aarch64__
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/rotate_win.cc b/TMessagesProj/jni/libyuv/source/rotate_win.cc
new file mode 100644
index 000000000..1300fc0fe
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/source/rotate_win.cc
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/rotate_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+
+__declspec(naked)
+void TransposeWx8_SSSE3(const uint8* src, int src_stride,
+ uint8* dst, int dst_stride, int width) {
+ __asm {
+ push edi
+ push esi
+ push ebp
+ mov eax, [esp + 12 + 4] // src
+ mov edi, [esp + 12 + 8] // src_stride
+ mov edx, [esp + 12 + 12] // dst
+ mov esi, [esp + 12 + 16] // dst_stride
+ mov ecx, [esp + 12 + 20] // width
+
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ align 4
+ convertloop:
+ movq xmm0, qword ptr [eax]
+ lea ebp, [eax + 8]
+ movq xmm1, qword ptr [eax + edi]
+ lea eax, [eax + 2 * edi]
+ punpcklbw xmm0, xmm1
+ movq xmm2, qword ptr [eax]
+ movdqa xmm1, xmm0
+ palignr xmm1, xmm1, 8
+ movq xmm3, qword ptr [eax + edi]
+ lea eax, [eax + 2 * edi]
+ punpcklbw xmm2, xmm3
+ movdqa xmm3, xmm2
+ movq xmm4, qword ptr [eax]
+ palignr xmm3, xmm3, 8
+ movq xmm5, qword ptr [eax + edi]
+ punpcklbw xmm4, xmm5
+ lea eax, [eax + 2 * edi]
+ movdqa xmm5, xmm4
+ movq xmm6, qword ptr [eax]
+ palignr xmm5, xmm5, 8
+ movq xmm7, qword ptr [eax + edi]
+ punpcklbw xmm6, xmm7
+ mov eax, ebp
+ movdqa xmm7, xmm6
+ palignr xmm7, xmm7, 8
+ // Second round of bit swap.
+ punpcklwd xmm0, xmm2
+ punpcklwd xmm1, xmm3
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ palignr xmm2, xmm2, 8
+ palignr xmm3, xmm3, 8
+ punpcklwd xmm4, xmm6
+ punpcklwd xmm5, xmm7
+ movdqa xmm6, xmm4
+ movdqa xmm7, xmm5
+ palignr xmm6, xmm6, 8
+ palignr xmm7, xmm7, 8
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ punpckldq xmm0, xmm4
+ movq qword ptr [edx], xmm0
+ movdqa xmm4, xmm0
+ palignr xmm4, xmm4, 8
+ movq qword ptr [edx + esi], xmm4
+ lea edx, [edx + 2 * esi]
+ punpckldq xmm2, xmm6
+ movdqa xmm6, xmm2
+ palignr xmm6, xmm6, 8
+ movq qword ptr [edx], xmm2
+ punpckldq xmm1, xmm5
+ movq qword ptr [edx + esi], xmm6
+ lea edx, [edx + 2 * esi]
+ movdqa xmm5, xmm1
+ movq qword ptr [edx], xmm1
+ palignr xmm5, xmm5, 8
+ punpckldq xmm3, xmm7
+ movq qword ptr [edx + esi], xmm5
+ lea edx, [edx + 2 * esi]
+ movq qword ptr [edx], xmm3
+ movdqa xmm7, xmm3
+ palignr xmm7, xmm7, 8
+ sub ecx, 8
+ movq qword ptr [edx + esi], xmm7
+ lea edx, [edx + 2 * esi]
+ jg convertloop
+
+ pop ebp
+ pop esi
+ pop edi
+ ret
+ }
+}
+
+__declspec(naked)
+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
+ uint8* dst_a, int dst_stride_a,
+ uint8* dst_b, int dst_stride_b,
+ int w) {
+ __asm {
+ push ebx
+ push esi
+ push edi
+ push ebp
+ mov eax, [esp + 16 + 4] // src
+ mov edi, [esp + 16 + 8] // src_stride
+ mov edx, [esp + 16 + 12] // dst_a
+ mov esi, [esp + 16 + 16] // dst_stride_a
+ mov ebx, [esp + 16 + 20] // dst_b
+ mov ebp, [esp + 16 + 24] // dst_stride_b
+ mov ecx, esp
+ sub esp, 4 + 16
+ and esp, ~15
+ mov [esp + 16], ecx
+ mov ecx, [ecx + 16 + 28] // w
+
+ align 4
+ convertloop:
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqa xmm7, xmm0 // use xmm7 as temp register.
+ punpcklbw xmm0, xmm1
+ punpckhbw xmm7, xmm1
+ movdqa xmm1, xmm7
+ movdqu xmm2, [eax]
+ movdqu xmm3, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqa xmm7, xmm2
+ punpcklbw xmm2, xmm3
+ punpckhbw xmm7, xmm3
+ movdqa xmm3, xmm7
+ movdqu xmm4, [eax]
+ movdqu xmm5, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqa xmm7, xmm4
+ punpcklbw xmm4, xmm5
+ punpckhbw xmm7, xmm5
+ movdqa xmm5, xmm7
+ movdqu xmm6, [eax]
+ movdqu xmm7, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqu [esp], xmm5 // backup xmm5
+ neg edi
+ movdqa xmm5, xmm6 // use xmm5 as temp register.
+ punpcklbw xmm6, xmm7
+ punpckhbw xmm5, xmm7
+ movdqa xmm7, xmm5
+ lea eax, [eax + 8 * edi + 16]
+ neg edi
+ // Second round of bit swap.
+ movdqa xmm5, xmm0
+ punpcklwd xmm0, xmm2
+ punpckhwd xmm5, xmm2
+ movdqa xmm2, xmm5
+ movdqa xmm5, xmm1
+ punpcklwd xmm1, xmm3
+ punpckhwd xmm5, xmm3
+ movdqa xmm3, xmm5
+ movdqa xmm5, xmm4
+ punpcklwd xmm4, xmm6
+ punpckhwd xmm5, xmm6
+ movdqa xmm6, xmm5
+ movdqu xmm5, [esp] // restore xmm5
+ movdqu [esp], xmm6 // backup xmm6
+ movdqa xmm6, xmm5 // use xmm6 as temp register.
+ punpcklwd xmm5, xmm7
+ punpckhwd xmm6, xmm7
+ movdqa xmm7, xmm6
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ movdqa xmm6, xmm0
+ punpckldq xmm0, xmm4
+ punpckhdq xmm6, xmm4
+ movdqa xmm4, xmm6
+ movdqu xmm6, [esp] // restore xmm6
+ movlpd qword ptr [edx], xmm0
+ movhpd qword ptr [ebx], xmm0
+ movlpd qword ptr [edx + esi], xmm4
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm4
+ lea ebx, [ebx + 2 * ebp]
+ movdqa xmm0, xmm2 // use xmm0 as the temp register.
+ punpckldq xmm2, xmm6
+ movlpd qword ptr [edx], xmm2
+ movhpd qword ptr [ebx], xmm2
+ punpckhdq xmm0, xmm6
+ movlpd qword ptr [edx + esi], xmm0
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm0
+ lea ebx, [ebx + 2 * ebp]
+ movdqa xmm0, xmm1 // use xmm0 as the temp register.
+ punpckldq xmm1, xmm5
+ movlpd qword ptr [edx], xmm1
+ movhpd qword ptr [ebx], xmm1
+ punpckhdq xmm0, xmm5
+ movlpd qword ptr [edx + esi], xmm0
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm0
+ lea ebx, [ebx + 2 * ebp]
+ movdqa xmm0, xmm3 // use xmm0 as the temp register.
+ punpckldq xmm3, xmm7
+ movlpd qword ptr [edx], xmm3
+ movhpd qword ptr [ebx], xmm3
+ punpckhdq xmm0, xmm7
+ sub ecx, 8
+ movlpd qword ptr [edx + esi], xmm0
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm0
+ lea ebx, [ebx + 2 * ebp]
+ jg convertloop
+
+ mov esp, [esp + 16]
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ }
+}
+
+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/TMessagesProj/jni/libyuv/source/row_any.cc b/TMessagesProj/jni/libyuv/source/row_any.cc
index aaa0378d7..5e5f435a6 100644
--- a/TMessagesProj/jni/libyuv/source/row_any.cc
+++ b/TMessagesProj/jni/libyuv/source/row_any.cc
@@ -10,6 +10,8 @@
#include "libyuv/row.h"
+#include // For memset.
+
#include "libyuv/basic_types.h"
#ifdef __cplusplus
@@ -17,584 +19,799 @@ namespace libyuv {
extern "C" {
#endif
-// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
-// TODO(fbarchard): Consider 'any' functions handling odd alignment.
-// YUV to RGB does multiple of 8 with SIMD and remainder with C.
-#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, \
- const uint8* u_buf, \
- const uint8* v_buf, \
- uint8* rgb_buf, \
- int width) { \
+// Subsampled source needs to be increase by 1 of not even.
+#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
+
+// Any 4 planes to 1 with yuvconstants
+#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
+ const uint8* a_buf, uint8* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 5]); \
+ memset(temp, 0, 64 * 4); /* for msan */ \
+ int r = width & MASK; \
int n = width & ~MASK; \
- I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \
- I420TORGB_C(y_buf + n, \
- u_buf + (n >> UV_SHIFT), \
- v_buf + (n >> UV_SHIFT), \
- rgb_buf + n * BPP, width & MASK); \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 192, a_buf + n, r); \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
+ yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
+ SS(r, DUVSHIFT) * BPP); \
+ }
+
+#ifdef HAS_I422ALPHATOARGBROW_SSSE3
+ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I422ALPHATOARGBROW_AVX2
+ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422ALPHATOARGBROW_NEON
+ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
+#endif
+#undef ANY41C
+
+// Any 3 planes to 1.
+#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
+ uint8* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 4]); \
+ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
+ SS(r, DUVSHIFT) * BPP); \
+ }
+#ifdef HAS_I422TOYUY2ROW_SSE2
+ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
+ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
+#endif
+#ifdef HAS_I422TOYUY2ROW_NEON
+ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
+#endif
+#ifdef HAS_I422TOUYVYROW_NEON
+ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
+#endif
+#ifdef HAS_BLENDPLANEROW_AVX2
+ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
+#endif
+#ifdef HAS_BLENDPLANEROW_SSSE3
+ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
+#endif
+#undef ANY31
+
+// Note that odd width replication includes 444 due to implementation
+// on arm that subsamples 444 to 422 internally.
+// Any 3 planes to 1 with yuvconstants
+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
+ uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
+ int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 4]); \
+ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ if (width & 1) { \
+ temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
+ temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
+ } \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \
+ yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
+ SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
-YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
- 1, 4, 7)
-#endif // HAS_I422TOARGBROW_SSSE3
+ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I411TOARGBROW_SSSE3
+ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
+#endif
#ifdef HAS_I444TOARGBROW_SSSE3
-YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
- 0, 4, 7)
-YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
- 2, 4, 7)
-YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
- 1, 4, 7)
-YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
- 1, 4, 7)
-YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
- 1, 4, 7)
-// I422ToRGB565Row_SSSE3 is unaligned.
-YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C,
- 1, 2, 7)
-YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C,
- 1, 2, 7)
-YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C,
- 1, 2, 7)
-// I422ToRGB24Row_SSSE3 is unaligned.
-YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
-YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
-YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
-YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
+ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
#endif // HAS_I444TOARGBROW_SSSE3
+#ifdef HAS_I422TORGB24ROW_AVX2
+ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
+#endif
#ifdef HAS_I422TOARGBROW_AVX2
-YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
-#endif // HAS_I422TOARGBROW_AVX2
+ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TORGBAROW_AVX2
+ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOARGBROW_AVX2
+ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I411TOARGBROW_AVX2
+ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOARGB4444ROW_AVX2
+ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGB1555ROW_AVX2
+ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TORGB565ROW_AVX2
+ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
+#endif
#ifdef HAS_I422TOARGBROW_NEON
-YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
-YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
-YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
-YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
-YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
-YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
-YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
-YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
-YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C,
- 1, 2, 7)
-YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
- 1, 2, 7)
-YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
-#endif // HAS_I422TOARGBROW_NEON
-#ifdef HAS_I422TOYUY2ROW_NEON
-YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
-#endif // HAS_I422TOYUY2ROW_NEON
-#ifdef HAS_I422TOUYVYROW_NEON
-YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
-#endif // HAS_I422TOUYVYROW_NEON
-#undef YANY
+ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
+ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
+ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
+ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
+#endif
+#undef ANY31C
-// Wrappers to handle odd width
-#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \
- void NAMEANY(const uint8* y_buf, \
- const uint8* uv_buf, \
- uint8* rgb_buf, \
- int width) { \
- int n = width & ~7; \
- NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n); \
- NV12TORGB_C(y_buf + n, \
- uv_buf + (n >> UV_SHIFT), \
- rgb_buf + n * BPP, width & 7); \
- }
-
-#ifdef HAS_NV12TOARGBROW_SSSE3
-NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
- 0, 4)
-NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
- 0, 4)
-#endif // HAS_NV12TOARGBROW_SSSE3
-#ifdef HAS_NV12TOARGBROW_NEON
-NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
-NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
-#endif // HAS_NV12TOARGBROW_NEON
-#ifdef HAS_NV12TORGB565ROW_SSSE3
-NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
- 0, 2)
-NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
- 0, 2)
-#endif // HAS_NV12TORGB565ROW_SSSE3
-#ifdef HAS_NV12TORGB565ROW_NEON
-NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2)
-NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
-#endif // HAS_NV12TORGB565ROW_NEON
-#undef NVANY
-
-#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
- void NAMEANY(const uint8* src, \
- uint8* dst, \
- int width) { \
+// Any 2 planes to 1.
+#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
+ void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
+ uint8* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
int n = width & ~MASK; \
- ARGBTORGB_SIMD(src, dst, n); \
- ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \
- }
-
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
-RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C,
- 15, 4, 3)
-RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C,
- 15, 4, 3)
-RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C,
- 3, 4, 2)
-RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C,
- 3, 4, 2)
-RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
- 3, 4, 2)
-#endif
-#if defined(HAS_I400TOARGBROW_SSE2)
-RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
- 7, 1, 4)
-#endif
-#if defined(HAS_YTOARGBROW_SSE2)
-RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C,
- 7, 1, 4)
-RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
- 15, 2, 4)
-RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
- 15, 2, 4)
-// These require alignment on ARGB, so C is used for remainder.
-RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
- 15, 3, 4)
-RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C,
- 15, 3, 4)
-RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
- 7, 2, 4)
-RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
- 7, 2, 4)
-RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
- 7, 2, 4)
-#endif
-#if defined(HAS_ARGBTORGB24ROW_NEON)
-RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
-RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3)
-RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C,
- 7, 4, 2)
-RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C,
- 7, 4, 2)
-RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
- 7, 4, 2)
-RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
- 7, 1, 4)
-RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C,
- 7, 1, 4)
-RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
- 7, 2, 4)
-RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
- 7, 2, 4)
-#endif
-#undef RGBANY
-
-// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
-#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
- void NAMEANY(const uint8* src, \
- uint8* dst, uint32 selector, \
- int width) { \
- int n = width & ~MASK; \
- ARGBTORGB_SIMD(src, dst, selector, n); \
- ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \
- }
-
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
-BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
- 7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERROW_NEON)
-BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
- 7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
-BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C,
- 7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_NEON)
-BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
- 7, 4, 1)
-#endif
-
-#undef BAYERANY
-
-// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
-#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \
- void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
- ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
- ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \
- dst_y + (width - NUM) * BPP, NUM); \
- }
-
-#ifdef HAS_ARGBTOYROW_AVX2
-YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
-YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32)
-YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
-YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
-#endif
-#ifdef HAS_ARGBTOYROW_SSSE3
-YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
-#endif
-#ifdef HAS_BGRATOYROW_SSSE3
-YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
-YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
-#endif
-#ifdef HAS_ARGBTOYJROW_SSSE3
-YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16)
-#endif
-#ifdef HAS_ARGBTOYROW_NEON
-YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
-#endif
-#ifdef HAS_ARGBTOYJROW_NEON
-YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8)
-#endif
-#ifdef HAS_BGRATOYROW_NEON
-YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
-#endif
-#ifdef HAS_ABGRTOYROW_NEON
-YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
-#endif
-#ifdef HAS_RGBATOYROW_NEON
-YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
-#endif
-#ifdef HAS_RGB24TOYROW_NEON
-YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
-#endif
-#ifdef HAS_RAWTOYROW_NEON
-YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
-#endif
-#ifdef HAS_RGB565TOYROW_NEON
-YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
-#endif
-#ifdef HAS_ARGB1555TOYROW_NEON
-YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
-#endif
-#ifdef HAS_ARGB4444TOYROW_NEON
-YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
-#endif
-#ifdef HAS_YUY2TOYROW_NEON
-YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
-#endif
-#ifdef HAS_UYVYTOYROW_NEON
-YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
-#endif
-#ifdef HAS_RGB24TOARGBROW_NEON
-YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
-#endif
-#ifdef HAS_RAWTOARGBROW_NEON
-YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
-#endif
-#ifdef HAS_RGB565TOARGBROW_NEON
-YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
-#endif
-#ifdef HAS_ARGB1555TOARGBROW_NEON
-YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
-#endif
-#ifdef HAS_ARGB4444TOARGBROW_NEON
-YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
-#endif
-#undef YANY
-
-#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
- int n = width & ~MASK; \
- ARGBTOY_SIMD(src_argb, dst_y, n); \
- ARGBTOY_C(src_argb + n * SBPP, \
- dst_y + n * BPP, width & MASK); \
- }
-
-// Attenuate is destructive so last16 method can not be used due to overlap.
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
- 4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C,
- 4, 4, 3)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C,
- 4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C,
- 4, 4, 7)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C,
- 4, 4, 7)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_NEON
-YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
- 4, 4, 7)
-#endif
-#undef YANY
-
-// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
-#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
- void NAMEANY(const uint8* src_argb, int src_stride_argb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \
- ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \
- dst_u + (n >> 1), \
- dst_v + (n >> 1), \
- width & MASK); \
- }
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31)
-UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31)
-UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
-#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
-UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
-UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
- 4, 15)
-UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
-UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
-UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
-UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15)
-UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
-#endif
-#ifdef HAS_ARGBTOUVROW_NEON
-UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
-#endif
-#ifdef HAS_ARGBTOUVJROW_NEON
-UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
-#endif
-#ifdef HAS_BGRATOUVROW_NEON
-UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
-#endif
-#ifdef HAS_ABGRTOUVROW_NEON
-UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
-#endif
-#ifdef HAS_RGBATOUVROW_NEON
-UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
-#endif
-#ifdef HAS_RGB24TOUVROW_NEON
-UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15)
-#endif
-#ifdef HAS_RAWTOUVROW_NEON
-UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
-#endif
-#ifdef HAS_RGB565TOUVROW_NEON
-UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
-#endif
-#ifdef HAS_ARGB1555TOUVROW_NEON
-UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
-#endif
-#ifdef HAS_ARGB4444TOUVROW_NEON
-UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
-#endif
-#ifdef HAS_YUY2TOUVROW_NEON
-UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
-#endif
-#ifdef HAS_UYVYTOUVROW_NEON
-UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
-#endif
-#undef UVANY
-
-#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
- void NAMEANY(const uint8* src_uv, \
- uint8* dst_u, uint8* dst_v, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
- ANYTOUV_C(src_uv + n * BPP, \
- dst_u + (n >> SHIFT), \
- dst_v + (n >> SHIFT), \
- width & MASK); \
- }
-
-#ifdef HAS_ARGBTOUV444ROW_SSSE3
-UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
- ARGBToUV444Row_C, 4, 15, 0)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_AVX2
-UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2,
- YUY2ToUV422Row_C, 2, 31, 1)
-UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
- UYVYToUV422Row_C, 2, 31, 1)
-#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
-UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
- ARGBToUV422Row_C, 4, 15, 1)
-UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
- YUY2ToUV422Row_C, 2, 15, 1)
-UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
- UYVYToUV422Row_C, 2, 15, 1)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_NEON
-UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
- ARGBToUV444Row_C, 4, 7, 0)
-UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
- ARGBToUV422Row_C, 4, 15, 1)
-UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
- ARGBToUV411Row_C, 4, 31, 2)
-UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
- YUY2ToUV422Row_C, 2, 15, 1)
-UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
- UYVYToUV422Row_C, 2, 15, 1)
-#endif
-#undef UV422ANY
-
-#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
- void NAMEANY(const uint8* src_uv, \
- uint8* dst_u, uint8* dst_v, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
- ANYTOUV_C(src_uv + n * 2, \
- dst_u + n, \
- dst_v + n, \
- width & MASK); \
- }
-
-#ifdef HAS_SPLITUVROW_SSE2
-SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
-#endif
-#ifdef HAS_SPLITUVROW_AVX2
-SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
-#endif
-#ifdef HAS_SPLITUVROW_NEON
-SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
-#endif
-#ifdef HAS_SPLITUVROW_MIPS_DSPR2
-SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
- SplitUVRow_C, 15)
-#endif
-#undef SPLITUVROWANY
-
-#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
- void NAMEANY(const uint8* src_u, const uint8* src_v, \
- uint8* dst_uv, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_u, src_v, dst_uv, n); \
- ANYTOUV_C(src_u + n, \
- src_v + n, \
- dst_uv + n * 2, \
- width & MASK); \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
+ } \
+ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ SS(r, UVSHIFT) * SBPP2); \
+ ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
+// Merge functions.
#ifdef HAS_MERGEUVROW_SSE2
-MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
+ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
#endif
#ifdef HAS_MERGEUVROW_AVX2
-MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31)
+ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
#endif
#ifdef HAS_MERGEUVROW_NEON
-MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
+ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
#endif
-#undef MERGEUVROW_ANY
-
-#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \
- void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \
- uint8* dst_argb, int width) { \
- int n = width & ~MASK; \
- ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \
- ARGBMATH_C(src_argb0 + n * 4, \
- src_argb1 + n * 4, \
- dst_argb + n * 4, \
- width & MASK); \
- }
+// Math functions.
#ifdef HAS_ARGBMULTIPLYROW_SSE2
-MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
- 3)
+ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
#endif
#ifdef HAS_ARGBADDROW_SSE2
-MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3)
+ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
#endif
#ifdef HAS_ARGBSUBTRACTROW_SSE2
-MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
- 3)
+ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
#endif
#ifdef HAS_ARGBMULTIPLYROW_AVX2
-MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
- 7)
+ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_AVX2
-MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7)
+ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_AVX2
-MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
- 7)
+ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBMULTIPLYROW_NEON
-MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
- 7)
+ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_NEON
-MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7)
+ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_NEON
-MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
- 7)
+ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
#endif
-#undef MATHROW_ANY
+#ifdef HAS_SOBELROW_SSE2
+ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
+#endif
+#ifdef HAS_SOBELROW_NEON
+ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
+#endif
+#ifdef HAS_SOBELTOPLANEROW_SSE2
+ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
+#endif
+#ifdef HAS_SOBELTOPLANEROW_NEON
+ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
+#endif
+#ifdef HAS_SOBELXYROW_SSE2
+ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
+#endif
+#ifdef HAS_SOBELXYROW_NEON
+ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
+#endif
+#undef ANY21
-// Shuffle may want to work in place, so last16 method can not be used.
-#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_argb, uint8* dst_argb, \
- const uint8* shuffler, int width) { \
+// Any 2 planes to 1 with yuvconstants
+#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
+ void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
+ uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
+ int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
int n = width & ~MASK; \
- ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n); \
- ARGBTOY_C(src_argb + n * SBPP, \
- dst_argb + n * BPP, shuffler, width & MASK); \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ SS(r, UVSHIFT) * SBPP2); \
+ ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
+// Biplanar to RGB.
+#ifdef HAS_NV12TOARGBROW_SSSE3
+ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TOARGBROW_AVX2
+ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
+#endif
+#ifdef HAS_NV12TOARGBROW_NEON
+ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV21TOARGBROW_SSSE3
+ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV21TOARGBROW_AVX2
+ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
+#endif
+#ifdef HAS_NV21TOARGBROW_NEON
+ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_SSSE3
+ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_AVX2
+ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
+#endif
+#ifdef HAS_NV12TORGB565ROW_NEON
+ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
+#endif
+#undef ANY21C
+
+// Any 1 to 1.
+#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8 temp[128 * 2]); \
+ memset(temp, 0, 128); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(temp, temp + 128, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+
+#ifdef HAS_COPYROW_AVX
+ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
+#endif
+#ifdef HAS_COPYROW_SSE2
+ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
+#endif
+#ifdef HAS_COPYROW_NEON
+ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_SSSE3)
+ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
+ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
+ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
+ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
+ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
+#endif
+#if defined(HAS_ARGBTORGB565ROW_AVX2)
+ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
+#endif
+#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
+ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
+ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
+#endif
+#if defined(HAS_J400TOARGBROW_SSE2)
+ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
+#endif
+#if defined(HAS_J400TOARGBROW_AVX2)
+ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
+#endif
+#if defined(HAS_I400TOARGBROW_SSE2)
+ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
+#endif
+#if defined(HAS_I400TOARGBROW_AVX2)
+ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
+#endif
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
+ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
+ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
+ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
+ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
+#endif
+#if defined(HAS_RAWTORGB24ROW_SSSE3)
+ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
+#endif
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_NEON)
+ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
+ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
+ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
+ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
+ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
+ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
+ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
+#endif
+#if defined(HAS_RAWTORGB24ROW_NEON)
+ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
+#endif
+#ifdef HAS_ARGBTOYROW_AVX2
+ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
+#endif
+#ifdef HAS_ARGBTOYJROW_AVX2
+ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
+#endif
+#ifdef HAS_UYVYTOYROW_AVX2
+ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
+#endif
+#ifdef HAS_YUY2TOYROW_AVX2
+ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
+#endif
+#ifdef HAS_ARGBTOYROW_SSSE3
+ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
+#endif
+#ifdef HAS_BGRATOYROW_SSSE3
+ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
+ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
+ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
+ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
+ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
+#endif
+#ifdef HAS_ARGBTOYJROW_SSSE3
+ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ARGBTOYROW_NEON
+ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_ARGBTOYJROW_NEON
+ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_BGRATOYROW_NEON
+ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_ABGRTOYROW_NEON
+ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_RGBATOYROW_NEON
+ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_RGB24TOYROW_NEON
+ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
+#endif
+#ifdef HAS_RAWTOYROW_NEON
+ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
+#endif
+#ifdef HAS_RGB565TOYROW_NEON
+ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
+#endif
+#ifdef HAS_ARGB1555TOYROW_NEON
+ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
+#endif
+#ifdef HAS_ARGB4444TOYROW_NEON
+ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
+#endif
+#ifdef HAS_YUY2TOYROW_NEON
+ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
+#endif
+#ifdef HAS_UYVYTOYROW_NEON
+ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 0, 2, 1, 15)
+#endif
+#ifdef HAS_RGB24TOARGBROW_NEON
+ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
+#endif
+#ifdef HAS_RAWTOARGBROW_NEON
+ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
+#endif
+#ifdef HAS_RGB565TOARGBROW_NEON
+ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
+#endif
+#ifdef HAS_ARGB1555TOARGBROW_NEON
+ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
+#endif
+#ifdef HAS_ARGB4444TOARGBROW_NEON
+ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_SSSE3
+ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
+ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_AVX2
+ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
+ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_NEON
+ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
+#endif
+#undef ANY11
+
+// Any 1 to 1 with yuvconstants
+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8 temp[128 * 2]); \
+ memset(temp, 0, 128); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
+ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
+ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
+ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
+#endif
+#undef ANY11C
+
+// Any 1 to 1 blended.
+#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8 temp[128 * 2]); \
+ memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(temp, temp + 128, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+
+#ifdef HAS_ARGBCOPYALPHAROW_AVX2
+ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
+#endif
+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
+ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
+ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
+#endif
+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
+ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
+#endif
+#undef ANY11B
+
+// Any 1 to 1 with parameter.
+#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
+ T shuffler, int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 2]); \
+ memset(temp, 0, 64); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \
+ } \
+ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+ }
+
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
+ const uint32, 4, 2, 3)
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
+ const uint32, 4, 2, 7)
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON,
+ const uint32, 4, 2, 7)
+#endif
#ifdef HAS_ARGBSHUFFLEROW_SSE2
-YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2,
- ARGBShuffleRow_C, 4, 4, 3)
+ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3)
#endif
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3,
- ARGBShuffleRow_C, 4, 4, 7)
+ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7)
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX2
-YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2,
- ARGBShuffleRow_C, 4, 4, 15)
+ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15)
#endif
#ifdef HAS_ARGBSHUFFLEROW_NEON
-YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON,
- ARGBShuffleRow_C, 4, 4, 3)
+ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
#endif
-#undef YANY
+#undef ANY11P
-// Interpolate may want to work in place, so last16 method can not be used.
-#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK) \
+// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
+#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
ptrdiff_t src_stride_ptr, int width, \
int source_y_fraction) { \
+ SIMD_ALIGNED(uint8 temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
int n = width & ~MASK; \
- TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr, \
- n, source_y_fraction); \
- TERP_C(dst_ptr + n * BPP, \
- src_ptr + n * SBPP, src_stride_ptr, \
- width & MASK, source_y_fraction); \
+ if (n > 0) { \
+ ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
+ } \
+ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
-NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2,
- InterpolateRow_C, 1, 1, 32)
+ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
-NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
- InterpolateRow_C, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_SSE2
-NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
- InterpolateRow_C, 1, 1, 15)
+ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
-NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON,
- InterpolateRow_C, 1, 1, 15)
+ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
-NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2,
- InterpolateRow_C, 1, 1, 3)
+ANY11T(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, 1, 1, 3)
#endif
-#undef NANY
+#undef ANY11T
+
+// Any 1 to 1 mirror.
+#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 2]); \
+ memset(temp, 0, 64); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
+ } \
+ memcpy(temp, src_ptr, r * BPP); \
+ ANY_SIMD(temp, temp + 64, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
+ }
+
+#ifdef HAS_MIRRORROW_AVX2
+ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
+#endif
+#ifdef HAS_MIRRORROW_SSSE3
+ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
+#endif
+#ifdef HAS_MIRRORROW_NEON
+ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
+#endif
+#ifdef HAS_ARGBMIRRORROW_AVX2
+ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
+#endif
+#ifdef HAS_ARGBMIRRORROW_SSE2
+ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
+#endif
+#ifdef HAS_ARGBMIRRORROW_NEON
+ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
+#endif
+#undef ANY11M
+
+// Any 1 plane. (memset)
+#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(uint8* dst_ptr, T v32, int width) { \
+ SIMD_ALIGNED(uint8 temp[64]); \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(dst_ptr, v32, n); \
+ } \
+ ANY_SIMD(temp, v32, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp, r * BPP); \
+ }
+
+#ifdef HAS_SETROW_X86
+ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3)
+#endif
+#ifdef HAS_SETROW_NEON
+ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15)
+#endif
+#ifdef HAS_ARGBSETROW_NEON
+ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
+#endif
+#undef ANY1
+
+// Any 1 to 2. Outputs UV planes.
+#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) {\
+ SIMD_ALIGNED(uint8 temp[128 * 3]); \
+ memset(temp, 0, 128); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_u, dst_v, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ /* repeat last 4 bytes for 422 subsampler */ \
+ if ((width & 1) && BPP == 4 && DUVSHIFT == 1) { \
+ memcpy(temp + SS(r, UVSHIFT) * BPP, \
+ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ } \
+ /* repeat last 4 - 12 bytes for 411 subsampler */ \
+ if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \
+ memcpy(temp + SS(r, UVSHIFT) * BPP, \
+ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \
+ temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \
+ } \
+ if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \
+ memcpy(temp + SS(r, UVSHIFT) * BPP, \
+ temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \
+ } \
+ if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \
+ memcpy(temp + SS(r, UVSHIFT) * BPP, \
+ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ } \
+ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
+ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
+ memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
+ }
+
+#ifdef HAS_SPLITUVROW_SSE2
+ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
+#endif
+#ifdef HAS_SPLITUVROW_AVX2
+ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
+#endif
+#ifdef HAS_SPLITUVROW_NEON
+ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
+#endif
+#ifdef HAS_SPLITUVROW_MIPS_DSPR2
+ANY12(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2, 0, 2, 0, 15)
+#endif
+#ifdef HAS_ARGBTOUV444ROW_SSSE3
+ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_AVX2
+ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
+ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
+#endif
+#ifdef HAS_ARGBTOUV422ROW_SSSE3
+ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_SSE2
+ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
+ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_NEON
+ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
+ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15)
+ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31)
+ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
+ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
+#endif
+#undef ANY12
+
+// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
+// 128 byte row allows for 32 avx ARGB pixels.
+#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \
+ uint8* dst_u, uint8* dst_v, int width) { \
+ SIMD_ALIGNED(uint8 temp[128 * 4]); \
+ memset(temp, 0, 128 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
+ SS(r, UVSHIFT) * BPP); \
+ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */\
+ memcpy(temp + SS(r, UVSHIFT) * BPP, \
+ temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
+ temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ } \
+ ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
+ memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
+ memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
+ }
+
+#ifdef HAS_ARGBTOUVROW_AVX2
+ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
+#endif
+#ifdef HAS_ARGBTOUVROW_SSSE3
+ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
+ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
+ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
+ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
+ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_AVX2
+ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
+ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
+#endif
+#ifdef HAS_YUY2TOUVROW_SSE2
+ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
+ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
+#endif
+#ifdef HAS_ARGBTOUVROW_NEON
+ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_ARGBTOUVJROW_NEON
+ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_BGRATOUVROW_NEON
+ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_ABGRTOUVROW_NEON
+ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_RGBATOUVROW_NEON
+ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_RGB24TOUVROW_NEON
+ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
+#endif
+#ifdef HAS_RAWTOUVROW_NEON
+ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
+#endif
+#ifdef HAS_RGB565TOUVROW_NEON
+ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
+#endif
+#ifdef HAS_ARGB1555TOUVROW_NEON
+ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
+#endif
+#ifdef HAS_ARGB4444TOUVROW_NEON
+ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_NEON
+ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
+#endif
+#ifdef HAS_UYVYTOUVROW_NEON
+ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
+#endif
+#undef ANY12S
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/row_common.cc b/TMessagesProj/jni/libyuv/source/row_common.cc
index fa2b752a2..c820cdf1f 100644
--- a/TMessagesProj/jni/libyuv/source/row_common.cc
+++ b/TMessagesProj/jni/libyuv/source/row_common.cc
@@ -100,6 +100,20 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
}
}
+void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8 r = src_raw[0];
+ uint8 g = src_raw[1];
+ uint8 b = src_raw[2];
+ dst_rgb24[0] = b;
+ dst_rgb24[1] = g;
+ dst_rgb24[2] = r;
+ dst_rgb24 += 3;
+ src_raw += 3;
+ }
+}
+
void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -199,6 +213,40 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
+// dither4 is a row of 4 values from 4x4 dither matrix.
+// The 4x4 matrix contains values to increase RGB. When converting to
+// fewer bits (565) this provides an ordered dither.
+// The order in the 4x4 matrix in first byte is upper left.
+// The 4 values are passed as an int, then referenced as an array, so
+// endian will not affect order of the original matrix. But the dither4
+// will containing the first pixel in the lower byte for little endian
+// or the upper byte for big endian.
+void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ int dither0 = ((const unsigned char*)(&dither4))[x & 3];
+ int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
+ uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
+ uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
+ uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
+ uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
+ uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
+ uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
+ WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
+ (b1 << 16) | (g1 << 21) | (r1 << 27));
+ dst_rgb += 4;
+ src_argb += 8;
+ }
+ if (width & 1) {
+ int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
+ uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
+ uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
+ uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
+ *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ }
+}
+
void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -385,6 +433,28 @@ void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
MAKEROWYJ(ARGB, 2, 1, 0, 4)
#undef MAKEROWYJ
+void ARGBToUVJ422Row_C(const uint8* src_argb,
+ uint8* dst_u, uint8* dst_v, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
+ uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
+ uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
+ dst_u[0] = RGBToUJ(ar, ag, ab);
+ dst_v[0] = RGBToVJ(ar, ag, ab);
+ src_argb += 8;
+ dst_u += 1;
+ dst_v += 1;
+ }
+ if (width & 1) {
+ uint8 ab = src_argb[0];
+ uint8 ag = src_argb[1];
+ uint8 ar = src_argb[2];
+ dst_u[0] = RGBToUJ(ar, ag, ab);
+ dst_v[0] = RGBToVJ(ar, ag, ab);
+ }
+}
+
void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -623,10 +693,11 @@ void ARGBToUV411Row_C(const uint8* src_argb,
dst_u += 1;
dst_v += 1;
}
+ // Odd width handling mimics 'any' function which replicates last pixel.
if ((width & 3) == 3) {
- uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
- uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
- uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
+ uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[8]) >> 2;
+ uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[9]) >> 2;
+ uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[10]) >> 2;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
} else if ((width & 3) == 2) {
@@ -926,7 +997,7 @@ void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
}
}
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
+void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
// Copy a Y to RGB.
int x;
for (x = 0; x < width; ++x) {
@@ -938,47 +1009,344 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
}
}
+// TODO(fbarchard): Unify these structures to be platform independent.
+// TODO(fbarchard): Generate SIMD structures from float matrix.
+
+// BT.601 YUV to RGB reference
+// R = (Y - 16) * 1.164 - V * -1.596
+// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
+// B = (Y - 16) * 1.164 - U * -2.018
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
+#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
+
+// U and V contributions to R,G,B.
+#define UB -128 /* max(-128, round(-2.018 * 64)) */
+#define UG 25 /* round(0.391 * 64) */
+#define VG 52 /* round(0.813 * 64) */
+#define VR -102 /* round(-1.596 * 64) */
+
+// Bias values to subtract 16 from Y and 128 from U and V.
+#define BB (UB * 128 + YGB)
+#define BG (UG * 128 + VG * 128 + YGB)
+#define BR (VR * 128 + YGB)
+
+#if defined(__aarch64__)
+const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
+ { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
+ { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
+ { UG, VG, UG, VG, UG, VG, UG, VG },
+ { UG, VG, UG, VG, UG, VG, UG, VG },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
+ { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
+ { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
+ { VG, UG, VG, UG, VG, UG, VG, UG },
+ { VG, UG, VG, UG, VG, UG, VG, UG },
+ { BR, BG, BB, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+#elif defined(__arm__)
+const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
+ { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
+ { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BR, BG, BB, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+#else
+const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
+ { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
+ { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
+ { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
+ { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
+ { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
+ { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
+ { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
+};
+const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
+ { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
+ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
+ { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
+ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
+ { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
+ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
+ { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
+ { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
+ { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
+ { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
+};
+#endif
+
+#undef BB
+#undef BG
+#undef BR
+#undef YGB
+#undef UB
+#undef UG
+#undef VG
+#undef VR
+#undef YG
+
+// JPEG YUV to RGB reference
+// * R = Y - V * -1.40200
+// * G = Y - U * 0.34414 - V * 0.71414
+// * B = Y - U * -1.77200
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
+#define YGB 32 /* 64 / 2 */
+
+// U and V contributions to R,G,B.
+#define UB -113 /* round(-1.77200 * 64) */
+#define UG 22 /* round(0.34414 * 64) */
+#define VG 46 /* round(0.71414 * 64) */
+#define VR -90 /* round(-1.40200 * 64) */
+
+// Bias values to round, and subtract 128 from U and V.
+#define BB (UB * 128 + YGB)
+#define BG (UG * 128 + VG * 128 + YGB)
+#define BR (VR * 128 + YGB)
+
+#if defined(__aarch64__)
+const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
+ { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
+ { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
+ { UG, VG, UG, VG, UG, VG, UG, VG },
+ { UG, VG, UG, VG, UG, VG, UG, VG },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
+ { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
+ { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
+ { VG, UG, VG, UG, VG, UG, VG, UG },
+ { VG, UG, VG, UG, VG, UG, VG, UG },
+ { BR, BG, BB, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+#elif defined(__arm__)
+const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
+ { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
+ { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BR, BG, BB, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+#else
+const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
+ { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
+ { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
+ { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
+ { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
+ { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
+ { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
+ { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
+};
+const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
+ { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
+ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
+ { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
+ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
+ { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
+ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
+ { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
+ { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
+ { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
+ { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
+};
+#endif
+
+#undef BB
+#undef BG
+#undef BR
+#undef YGB
+#undef UB
+#undef UG
+#undef VG
+#undef VR
+#undef YG
+
+// BT.709 YUV to RGB reference
+// * R = Y - V * -1.28033
+// * G = Y - U * 0.21482 - V * 0.38059
+// * B = Y - U * -2.12798
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
+#define YGB 32 /* 64 / 2 */
+
+// TODO(fbarchard): Find way to express 2.12 instead of 2.0.
+// U and V contributions to R,G,B.
+#define UB -128 /* max(-128, round(-2.12798 * 64)) */
+#define UG 14 /* round(0.21482 * 64) */
+#define VG 24 /* round(0.38059 * 64) */
+#define VR -82 /* round(-1.28033 * 64) */
+
+// Bias values to round, and subtract 128 from U and V.
+#define BB (UB * 128 + YGB)
+#define BG (UG * 128 + VG * 128 + YGB)
+#define BR (VR * 128 + YGB)
+
+#if defined(__aarch64__)
+const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
+ { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
+ { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
+ { UG, VG, UG, VG, UG, VG, UG, VG },
+ { UG, VG, UG, VG, UG, VG, UG, VG },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
+ { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
+ { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
+ { VG, UG, VG, UG, VG, UG, VG, UG },
+ { VG, UG, VG, UG, VG, UG, VG, UG },
+ { BR, BG, BB, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+#elif defined(__arm__)
+const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
+ { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
+ { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BR, BG, BB, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+#else
+const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
+ { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
+ { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
+ { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
+ { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
+ { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
+ { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
+ { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
+};
+const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
+ { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
+ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
+ { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
+ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
+ { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
+ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
+ { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
+ { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
+ { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
+ { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
+};
+#endif
+
+#undef BB
+#undef BG
+#undef BR
+#undef YGB
+#undef UB
+#undef UG
+#undef VG
+#undef VR
+#undef YG
+
// C reference code that mimics the YUV assembly.
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
- uint8* b, uint8* g, uint8* r) {
- int32 y1 = ((int32)(y) - 16) * YG;
- *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
- *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
- *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
+ uint8* b, uint8* g, uint8* r,
+ const struct YuvConstants* yuvconstants) {
+#if defined(__aarch64__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = -yuvconstants->kUVToRB[1];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#elif defined(__arm__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[4];
+ int vr = -yuvconstants->kUVToRB[4];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#else
+ int ub = yuvconstants->kUVToB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = yuvconstants->kUVToR[1];
+ int bb = yuvconstants->kUVBiasB[0];
+ int bg = yuvconstants->kUVBiasG[0];
+ int br = yuvconstants->kUVBiasR[0];
+ int yg = yuvconstants->kYToRgb[0];
+#endif
+
+ uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16;
+ *b = Clamp((int32)(-(u * ub ) + y1 + bb) >> 6);
+ *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
+ *r = Clamp((int32)(-( v * vr) + y1 + br) >> 6);
}
+// Y contribution to R,G,B. Scale and bias.
+#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
+#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
+
+// C reference code that mimics the YUV assembly.
+static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
+ uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
+ *b = Clamp((int32)(y1 + YGB) >> 6);
+ *g = Clamp((int32)(y1 + YGB) >> 6);
+ *r = Clamp((int32)(y1 + YGB) >> 6);
+}
+
+#undef YG
+#undef YGB
+
#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+ (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
void I444ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
- YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
+ yuvconstants);
rgb_buf[3] = 255;
- YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
+ yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 2;
@@ -987,7 +1355,8 @@ void I444ToARGBRow_C(const uint8* src_y,
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
}
}
#else
@@ -995,11 +1364,12 @@ void I444ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
@@ -1008,19 +1378,21 @@ void I444ToARGBRow_C(const uint8* src_y,
}
}
#endif
+
// Also used for 420
void I422ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
@@ -1029,22 +1401,51 @@ void I422ToARGBRow_C(const uint8* src_y,
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
+void I422AlphaToARGBRow_C(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ const uint8* src_a,
+ uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0],
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = src_a[0];
+ YuvPixel(src_y[1], src_u[0], src_v[0],
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = src_a[1];
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ src_a += 2;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0],
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = src_a[0];
+ }
+}
+
void I422ToRGB24Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
+ rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants);
src_y += 2;
src_u += 1;
src_v += 1;
@@ -1052,29 +1453,7 @@ void I422ToRGB24Row_C(const uint8* src_y,
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- }
-}
-
-void I422ToRAWRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 6; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
}
}
@@ -1082,6 +1461,7 @@ void I422ToARGB4444Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1091,8 +1471,8 @@ void I422ToARGB4444Row_C(const uint8* src_y,
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@@ -1107,7 +1487,7 @@ void I422ToARGB4444Row_C(const uint8* src_y,
dst_argb4444 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@@ -1120,6 +1500,7 @@ void I422ToARGB1555Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1129,8 +1510,8 @@ void I422ToARGB1555Row_C(const uint8* src_y,
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@@ -1145,7 +1526,7 @@ void I422ToARGB1555Row_C(const uint8* src_y,
dst_argb1555 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@@ -1158,6 +1539,7 @@ void I422ToRGB565Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1167,8 +1549,8 @@ void I422ToRGB565Row_C(const uint8* src_y,
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1183,7 +1565,7 @@ void I422ToRGB565Row_C(const uint8* src_y,
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1195,20 +1577,21 @@ void I411ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 3; x += 4) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
YuvPixel(src_y[2], src_u[0], src_v[0],
- rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
+ rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants);
rgb_buf[11] = 255;
YuvPixel(src_y[3], src_u[0], src_v[0],
- rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
+ rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants);
rgb_buf[15] = 255;
src_y += 4;
src_u += 1;
@@ -1217,40 +1600,41 @@ void I411ToARGBRow_C(const uint8* src_y,
}
if (width & 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void NV12ToARGBRow_C(const uint8* src_y,
- const uint8* usrc_v,
+ const uint8* src_uv,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ YuvPixel(src_y[0], src_uv[0], src_uv[1],
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
- YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ YuvPixel(src_y[1], src_uv[0], src_uv[1],
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
- usrc_v += 2;
+ src_uv += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ YuvPixel(src_y[0], src_uv[0], src_uv[1],
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@@ -1258,31 +1642,31 @@ void NV12ToARGBRow_C(const uint8* src_y,
void NV21ToARGBRow_C(const uint8* src_y,
const uint8* src_vu,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
-
YuvPixel(src_y[1], src_vu[1], src_vu[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
-
src_y += 2;
src_vu += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void NV12ToRGB565Row_C(const uint8* src_y,
- const uint8* usrc_v,
+ const uint8* src_uv,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1292,8 +1676,8 @@ void NV12ToRGB565Row_C(const uint8* src_y,
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
- YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1303,46 +1687,11 @@ void NV12ToRGB565Row_C(const uint8* src_y,
*(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
(b1 << 16) | (g1 << 21) | (r1 << 27);
src_y += 2;
- usrc_v += 2;
+ src_uv += 2;
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void NV21ToRGB565Row_C(const uint8* src_y,
- const uint8* vsrc_u,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- vsrc_u += 2;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1352,92 +1701,44 @@ void NV21ToRGB565Row_C(const uint8* src_y,
void YUY2ToARGBRow_C(const uint8* src_yuy2,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_yuy2 += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void UYVYToARGBRow_C(const uint8* src_uyvy,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_uyvy += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void I422ToBGRARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
- rgb_buf[0] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
- rgb_buf[4] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
- rgb_buf[0] = 255;
- }
-}
-
-void I422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@@ -1446,14 +1747,15 @@ void I422ToRGBARow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+ rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
+ rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants);
rgb_buf[4] = 255;
src_y += 2;
src_u += 1;
@@ -1462,26 +1764,23 @@ void I422ToRGBARow_C(const uint8* src_y,
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+ rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255;
}
}
-void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
+void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], 128, 128,
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
rgb_buf[3] = 255;
- YuvPixel(src_y[1], 128, 128,
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
rgb_buf[7] = 255;
src_y += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], 128, 128,
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
rgb_buf[3] = 255;
}
}
@@ -1569,28 +1868,15 @@ void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
memcpy(dst, src, count * 2);
}
-void SetRow_C(uint8* dst, uint32 v8, int count) {
-#ifdef _MSC_VER
- // VC will generate rep stosb.
- int x;
- for (x = 0; x < count; ++x) {
- dst[x] = v8;
- }
-#else
- memset(dst, v8, count);
-#endif
+void SetRow_C(uint8* dst, uint8 v8, int width) {
+ memset(dst, v8, width);
}
-void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- int y;
- for (y = 0; y < height; ++y) {
- uint32* d = (uint32*)(dst);
- int x;
- for (x = 0; x < width; ++x) {
- d[x] = v32;
- }
- dst += dst_stride;
+void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
+ uint32* d = (uint32*)(dst_argb);
+ int x;
+ for (x = 0; x < width; ++x) {
+ d[x] = v32;
}
}
@@ -1730,6 +2016,25 @@ void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
}
}
#undef BLEND
+
+#define UBLEND(f, b, a) (((a) * f) + ((255 - a) * b) + 255) >> 8
+void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
+ dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
+ src0 += 2;
+ src1 += 2;
+ alpha += 2;
+ dst += 2;
+ }
+ if (width & 1) {
+ dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
+ }
+}
+#undef UBLEND
+
#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
// Multiply source RGB by alpha and store to destination.
@@ -1885,19 +2190,19 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
}
}
-// Blend 2 rows into 1 for conversions such as I422ToI420.
-void HalfRow_C(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
+// Blend 2 rows into 1.
+static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
+ uint8* dst_uv, int width) {
int x;
- for (x = 0; x < pix; ++x) {
+ for (x = 0; x < width; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
-void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
- uint16* dst_uv, int pix) {
+static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
+ uint16* dst_uv, int width) {
int x;
- for (x = 0; x < pix; ++x) {
+ for (x = 0; x < width; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
@@ -1906,27 +2211,30 @@ void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride,
int width, int source_y_fraction) {
- int y1_fraction = source_y_fraction;
+ int y1_fraction = source_y_fraction ;
int y0_fraction = 256 - y1_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
int x;
- if (source_y_fraction == 0) {
+ if (y1_fraction == 0) {
memcpy(dst_ptr, src_ptr, width);
return;
}
- if (source_y_fraction == 128) {
+ if (y1_fraction == 128) {
HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
return;
}
for (x = 0; x < width - 1; x += 2) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
- dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
+ dst_ptr[0] =
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+ dst_ptr[1] =
+ (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
src_ptr += 2;
src_ptr1 += 2;
dst_ptr += 2;
}
if (width & 1) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+ dst_ptr[0] =
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
}
}
@@ -1957,50 +2265,16 @@ void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
}
}
-// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
-void ARGBToBayerRow_C(const uint8* src_argb,
- uint8* dst_bayer, uint32 selector, int pix) {
- int index0 = selector & 0xff;
- int index1 = (selector >> 8) & 0xff;
- // Copy a row of Bayer.
- int x;
- for (x = 0; x < pix - 1; x += 2) {
- dst_bayer[0] = src_argb[index0];
- dst_bayer[1] = src_argb[index1];
- src_argb += 8;
- dst_bayer += 2;
- }
- if (pix & 1) {
- dst_bayer[0] = src_argb[index0];
- }
-}
-
-// Select G channel from ARGB. e.g. GGGGGGGG
-void ARGBToBayerGGRow_C(const uint8* src_argb,
- uint8* dst_bayer, uint32 selector, int pix) {
- // Copy a row of G.
- int x;
- for (x = 0; x < pix - 1; x += 2) {
- dst_bayer[0] = src_argb[1];
- dst_bayer[1] = src_argb[5];
- src_argb += 8;
- dst_bayer += 2;
- }
- if (pix & 1) {
- dst_bayer[0] = src_argb[1];
- }
-}
-
// Use first 4 shuffler values to reorder ARGB channels.
void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
int index0 = shuffler[0];
int index1 = shuffler[1];
int index2 = shuffler[2];
int index3 = shuffler[3];
// Shuffle a row of ARGB.
int x;
- for (x = 0; x < pix; ++x) {
+ for (x = 0; x < width; ++x) {
// To support in-place conversion.
uint8 b = src_argb[index0];
uint8 g = src_argb[index1];
@@ -2033,7 +2307,7 @@ void I422ToYUY2Row_C(const uint8* src_y,
if (width & 1) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
- dst_frame[2] = src_y[0]; // duplicate last y
+ dst_frame[2] = 0;
dst_frame[3] = src_v[0];
}
}
@@ -2057,130 +2331,14 @@ void I422ToUYVYRow_C(const uint8* src_y,
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
- dst_frame[3] = src_y[0]; // duplicate last y
+ dst_frame[3] = 0;
}
}
-#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
-// row_win.cc has asm version, but GCC uses 2 step wrapper.
-#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
- ARGBToRGB565Row_SSE2(row, rgb_buf, width);
- free_aligned_buffer_64(row);
-}
-#endif // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
-
-#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
- ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
- free_aligned_buffer_64(row);
-}
-
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
- ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
- free_aligned_buffer_64(row);
-}
-
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
- free_aligned_buffer_64(row);
-}
-
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_rgb565,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
- free_aligned_buffer_64(row);
-}
-
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
- YUY2ToYRow_SSE2(src_yuy2, row_y, width);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
- YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
- I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
- UYVYToYRow_SSE2(src_uyvy, row_y, width);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
- UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
- I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-#endif // !defined(LIBYUV_DISABLE_X86)
void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
+ uint8* dst_argb,
+ const float* poly,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -2280,6 +2438,204 @@ void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
}
}
+// Maximum temporary width for wrappers to process at a time, in pixels.
+#define MAXTWIDTH 2048
+
+#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
+ defined(HAS_I422TORGB565ROW_SSSE3)
+// row_win.cc has asm version, but GCC uses 2 step wrapper.
+void I422ToRGB565Row_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_SSSE3)
+void I422ToARGB1555Row_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb1555 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_SSSE3)
+void I422ToARGB4444Row_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb4444 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
+void NV12ToRGB565Row_SSSE3(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TORGB565ROW_AVX2)
+void I422ToRGB565Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_AVX2)
+void I422ToARGB1555Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb1555 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_AVX2)
+void I422ToARGB4444Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb4444 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TORGB24ROW_AVX2)
+void I422ToRGB24Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ // TODO(fbarchard): ARGBToRGB24Row_AVX2
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_AVX2)
+void NV12ToRGB565Row_AVX2(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
+ ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/TMessagesProj/jni/libyuv/source/row_posix.cc b/TMessagesProj/jni/libyuv/source/row_gcc.cc
similarity index 57%
rename from TMessagesProj/jni/libyuv/source/row_posix.cc
rename to TMessagesProj/jni/libyuv/source/row_gcc.cc
index 106fda568..80b2a95aa 100644
--- a/TMessagesProj/jni/libyuv/source/row_posix.cc
+++ b/TMessagesProj/jni/libyuv/source/row_gcc.cc
@@ -1,3 +1,4 @@
+// VERSION 2
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
@@ -16,7 +17,8 @@ extern "C" {
#endif
// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
@@ -92,6 +94,7 @@ static uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
+// 7 bit fixed point 0.5.
static vec16 kAddYJ64 = {
64, 64, 64, 64, 64, 64, 64, 64
};
@@ -118,6 +121,24 @@ static uvec8 kShuffleMaskRAWToARGB = {
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
};
+// Shuffle table for converting RAW to RGB24. First 8.
+static const uvec8 kShuffleMaskRAWToRGB24_0 = {
+ 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting RAW to RGB24. Middle 8.
+static const uvec8 kShuffleMaskRAWToRGB24_1 = {
+ 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting RAW to RGB24. Last 8.
+static const uvec8 kShuffleMaskRAWToRGB24_2 = {
+ 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
// Shuffle table for converting ARGB to RGB24.
static uvec8 kShuffleMaskARGBToRGB24 = {
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
@@ -133,143 +154,39 @@ static uvec8 kShuffleMaskARGBToRGB24_0 = {
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
};
-// Shuffle table for converting ARGB to RAW.
-static uvec8 kShuffleMaskARGBToRAW_0 = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
+// YUY2 shuf 16 Y to 32 Y.
+static const lvec8 kShuffleYUY2Y = {
+ 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14,
+ 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
+};
+
+// YUY2 shuf 8 UV to 16 UV.
+static const lvec8 kShuffleYUY2UV = {
+ 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15,
+ 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15
+};
+
+// UYVY shuf 16 Y to 32 Y.
+static const lvec8 kShuffleUYVYY = {
+ 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15,
+ 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15
+};
+
+// UYVY shuf 8 UV to 16 UV.
+static const lvec8 kShuffleUYVYUV = {
+ 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14,
+ 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
+};
+
+// NV21 shuf 8 VU to 16 UV.
+static const lvec8 kShuffleNV21 = {
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
};
#endif // HAS_RGB24TOARGBROW_SSSE3
-#if defined(TESTING) && defined(__x86_64__)
-void TestRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- asm volatile (
- ".p2align 5 \n"
- "mov %%eax,%%eax \n"
- "mov %%ebx,%%ebx \n"
- "mov %%ecx,%%ecx \n"
- "mov %%edx,%%edx \n"
- "mov %%esi,%%esi \n"
- "mov %%edi,%%edi \n"
- "mov %%ebp,%%ebp \n"
- "mov %%esp,%%esp \n"
- ".p2align 5 \n"
- "mov %%r8d,%%r8d \n"
- "mov %%r9d,%%r9d \n"
- "mov %%r10d,%%r10d \n"
- "mov %%r11d,%%r11d \n"
- "mov %%r12d,%%r12d \n"
- "mov %%r13d,%%r13d \n"
- "mov %%r14d,%%r14d \n"
- "mov %%r15d,%%r15d \n"
- ".p2align 5 \n"
- "lea (%%rax),%%eax \n"
- "lea (%%rbx),%%ebx \n"
- "lea (%%rcx),%%ecx \n"
- "lea (%%rdx),%%edx \n"
- "lea (%%rsi),%%esi \n"
- "lea (%%rdi),%%edi \n"
- "lea (%%rbp),%%ebp \n"
- "lea (%%rsp),%%esp \n"
- ".p2align 5 \n"
- "lea (%%r8),%%r8d \n"
- "lea (%%r9),%%r9d \n"
- "lea (%%r10),%%r10d \n"
- "lea (%%r11),%%r11d \n"
- "lea (%%r12),%%r12d \n"
- "lea (%%r13),%%r13d \n"
- "lea (%%r14),%%r14d \n"
- "lea (%%r15),%%r15d \n"
-
- ".p2align 5 \n"
- "lea 0x10(%%rax),%%eax \n"
- "lea 0x10(%%rbx),%%ebx \n"
- "lea 0x10(%%rcx),%%ecx \n"
- "lea 0x10(%%rdx),%%edx \n"
- "lea 0x10(%%rsi),%%esi \n"
- "lea 0x10(%%rdi),%%edi \n"
- "lea 0x10(%%rbp),%%ebp \n"
- "lea 0x10(%%rsp),%%esp \n"
- ".p2align 5 \n"
- "lea 0x10(%%r8),%%r8d \n"
- "lea 0x10(%%r9),%%r9d \n"
- "lea 0x10(%%r10),%%r10d \n"
- "lea 0x10(%%r11),%%r11d \n"
- "lea 0x10(%%r12),%%r12d \n"
- "lea 0x10(%%r13),%%r13d \n"
- "lea 0x10(%%r14),%%r14d \n"
- "lea 0x10(%%r15),%%r15d \n"
-
- ".p2align 5 \n"
- "add 0x10,%%eax \n"
- "add 0x10,%%ebx \n"
- "add 0x10,%%ecx \n"
- "add 0x10,%%edx \n"
- "add 0x10,%%esi \n"
- "add 0x10,%%edi \n"
- "add 0x10,%%ebp \n"
- "add 0x10,%%esp \n"
- ".p2align 5 \n"
- "add 0x10,%%r8d \n"
- "add 0x10,%%r9d \n"
- "add 0x10,%%r10d \n"
- "add 0x10,%%r11d \n"
- "add 0x10,%%r12d \n"
- "add 0x10,%%r13d \n"
- "add 0x10,%%r14d \n"
- "add 0x10,%%r15d \n"
-
- ".p2align 2 \n"
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // TESTING
-
-#ifdef HAS_I400TOARGBROW_SSE2
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm5,%%xmm0 \n"
- "por %%xmm5,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
- int pix) {
+#ifdef HAS_J400TOARGBROW_SSE2
+void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"pslld $0x18,%%xmm5 \n"
@@ -290,18 +207,14 @@ void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
"jg 1b \n"
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
+ "+r"(width) // %2
+ :: "memory", "cc", "xmm0", "xmm1", "xmm5"
);
}
-#endif // HAS_I400TOARGBROW_SSE2
+#endif // HAS_J400TOARGBROW_SSE2
#ifdef HAS_RGB24TOARGBROW_SSSE3
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
"pslld $0x18,%%xmm5 \n"
@@ -318,31 +231,28 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
"por %%xmm5,%%xmm2 \n"
"palignr $0xc,%%xmm0,%%xmm1 \n"
"pshufb %%xmm4,%%xmm0 \n"
- "movdqa %%xmm2," MEMACCESS2(0x20,1) " \n"
+ "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
"por %%xmm5,%%xmm0 \n"
"pshufb %%xmm4,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"por %%xmm5,%%xmm1 \n"
"palignr $0x4,%%xmm3,%%xmm3 \n"
"pshufb %%xmm4,%%xmm3 \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
"por %%xmm5,%%xmm3 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm3," MEMACCESS2(0x30,1) " \n"
+ "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n"
"lea " MEMLEA(0x40,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kShuffleMaskRGB24ToARGB) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
+void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
"pslld $0x18,%%xmm5 \n"
@@ -359,31 +269,58 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
"por %%xmm5,%%xmm2 \n"
"palignr $0xc,%%xmm0,%%xmm1 \n"
"pshufb %%xmm4,%%xmm0 \n"
- "movdqa %%xmm2," MEMACCESS2(0x20,1) " \n"
+ "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
"por %%xmm5,%%xmm0 \n"
"pshufb %%xmm4,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"por %%xmm5,%%xmm1 \n"
"palignr $0x4,%%xmm3,%%xmm3 \n"
"pshufb %%xmm4,%%xmm3 \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
"por %%xmm5,%%xmm3 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm3," MEMACCESS2(0x30,1) " \n"
+ "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n"
"lea " MEMLEA(0x40,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kShuffleMaskRAWToARGB) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
+void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width) {
+ asm volatile (
+ "movdqa %3,%%xmm3 \n"
+ "movdqa %4,%%xmm4 \n"
+ "movdqa %5,%%xmm5 \n"
+ LABELALIGN
+ "1: \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x4,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS2(0x8,0) ",%%xmm2 \n"
+ "lea " MEMLEA(0x18,0) ",%0 \n"
+ "pshufb %%xmm3,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "movq %%xmm0," MEMACCESS(1) " \n"
+ "movq %%xmm1," MEMACCESS2(0x8,1) " \n"
+ "movq %%xmm2," MEMACCESS2(0x10,1) " \n"
+ "lea " MEMLEA(0x18,1) ",%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskRAWToRGB24_0), // %3
+ "m"(kShuffleMaskRAWToRGB24_1), // %4
+ "m"(kShuffleMaskRAWToRGB24_2) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+
+void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) {
asm volatile (
"mov $0x1080108,%%eax \n"
"movd %%eax,%%xmm5 \n"
@@ -417,27 +354,21 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm1,0x00,1,0,2) // movdqa %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqa,xmm2,0x10,1,0,2) // movdqa %%xmm2,0x10(%1,%0,2)
+ MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
+ MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc", "eax", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
-void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
+void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) {
asm volatile (
"mov $0x1080108,%%eax \n"
"movd %%eax,%%xmm5 \n"
@@ -474,27 +405,21 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm1,0x00,1,0,2) // movdqa %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqa,xmm2,0x10,1,0,2) // movdqa %%xmm2,0x10(%1,%0,2)
+ MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
+ MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc", "eax", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
-void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
+void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) {
asm volatile (
"mov $0xf0f0f0f,%%eax \n"
"movd %%eax,%%xmm4 \n"
@@ -518,27 +443,21 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,2) // movdqa %%xmm0,(%1,%0,2)
- MEMOPMEM(movdqa,xmm1,0x10,1,0,2) // movdqa %%xmm1,0x10(%1,%0,2)
+ MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2)
+ MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2)
"lea " MEMLEA(0x10,0) ",%0 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "eax", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
+void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int width) {
asm volatile (
"movdqa %3,%%xmm6 \n"
LABELALIGN
@@ -570,16 +489,13 @@ void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kShuffleMaskARGBToRGB24) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
-void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
+void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int width) {
asm volatile (
"movdqa %3,%%xmm6 \n"
LABELALIGN
@@ -611,16 +527,13 @@ void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kShuffleMaskARGBToRAW) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
-void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
+void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int width) {
asm volatile (
"pcmpeqb %%xmm3,%%xmm3 \n"
"psrld $0x1b,%%xmm3 \n"
@@ -631,7 +544,7 @@ void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
"pslld $0xb,%%xmm5 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"pslld $0x8,%%xmm0 \n"
@@ -651,16 +564,104 @@ void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ "+r"(width) // %2
+ :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
+void ARGBToRGB565DitherRow_SSE2(const uint8* src, uint8* dst,
+ const uint32 dither4, int width) {
+ asm volatile (
+ "movd %3,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm6 \n"
+ "movdqa %%xmm6,%%xmm7 \n"
+ "punpcklwd %%xmm6,%%xmm6 \n"
+ "punpckhwd %%xmm7,%%xmm7 \n"
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psrld $0x1b,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrld $0x1a,%%xmm4 \n"
+ "pslld $0x5,%%xmm4 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0xb,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "paddusb %%xmm6,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pslld $0x8,%%xmm0 \n"
+ "psrld $0x3,%%xmm1 \n"
+ "psrld $0x5,%%xmm2 \n"
+ "psrad $0x10,%%xmm0 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "pand %%xmm4,%%xmm2 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(dither4) // %3
+ : "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+
+#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
+void ARGBToRGB565DitherRow_AVX2(const uint8* src, uint8* dst,
+ const uint32 dither4, int width) {
+ asm volatile (
+ "vbroadcastss %3,%%xmm6 \n"
+ "vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n"
+ "vpermq $0xd8,%%ymm6,%%ymm6 \n"
+ "vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n"
+ "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n"
+ "vpsrld $0x1b,%%ymm3,%%ymm3 \n"
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrld $0x1a,%%ymm4,%%ymm4 \n"
+ "vpslld $0x5,%%ymm4,%%ymm4 \n"
+ "vpslld $0xb,%%ymm3,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vpaddusb %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpsrld $0x5,%%ymm0,%%ymm2 \n"
+ "vpsrld $0x3,%%ymm0,%%ymm1 \n"
+ "vpsrld $0x8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpand %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm2,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "lea 0x20(%0),%0 \n"
+ "vmovdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(dither4) // %3
+ : "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+#endif // HAS_ARGBTORGB565DITHERROW_AVX2
+
+
+void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int width) {
asm volatile (
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrld $0x1b,%%xmm4 \n"
@@ -672,7 +673,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
"pslld $0xf,%%xmm7 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm0,%%xmm3 \n"
@@ -690,21 +691,18 @@ void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
"packssdw %%xmm0,%%xmm0 \n"
"lea " MEMLEA(0x10,0) ",%0 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMACCESS2(0x8,1) ",%1 \n"
+ "lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ "+r"(width) // %2
+ :: "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
-void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
+void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) {
asm volatile (
"pcmpeqb %%xmm4,%%xmm4 \n"
"psllw $0xc,%%xmm4 \n"
@@ -712,7 +710,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
"psrlw $0x8,%%xmm3 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pand %%xmm3,%%xmm0 \n"
"pand %%xmm4,%%xmm1 \n"
@@ -727,58 +725,18 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
+ "+r"(width) // %2
+ :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
);
}
#endif // HAS_RGB24TOARGBROW_SSSE3
#ifdef HAS_ARGBTOYROW_SSSE3
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
+void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
- "movdqa %4,%%xmm5 \n"
"movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
"movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
@@ -796,63 +754,24 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
"psrlw $0x7,%%xmm2 \n"
"packuswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kARGBToY), // %3
"m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_ARGBTOYROW_SSSE3
#ifdef HAS_ARGBTOYJROW_SSSE3
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
+// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
+void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
"movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n"
@@ -874,53 +793,131 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
"psrlw $0x7,%%xmm0 \n"
"psrlw $0x7,%%xmm2 \n"
"packuswb %%xmm2,%%xmm0 \n"
- "sub $0x10,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kARGBToYJ), // %3
"m"(kAddYJ64) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_ARGBTOYJROW_SSSE3
+#ifdef HAS_ARGBTOYROW_AVX2
+// vpermd for vphaddw + vpackuswb vpermd.
+static const lvec32 kPermdARGBToY_AVX = {
+ 0, 4, 1, 5, 2, 6, 3, 7
+};
+
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
+ asm volatile (
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
+ "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "lea " MEMLEA(0x80,0) ",%0 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
+ "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
+ "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToY), // %3
+ "m"(kAddY16), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+ );
+}
+#endif // HAS_ARGBTOYROW_AVX2
+
+#ifdef HAS_ARGBTOYJROW_AVX2
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
+ asm volatile (
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
+ "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "lea " MEMLEA(0x80,0) ",%0 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
+ "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding.
+ "vpaddw %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToYJ), // %3
+ "m"(kAddYJ64), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+ );
+}
+#endif // HAS_ARGBTOYJROW_AVX2
+
#ifdef HAS_ARGBTOUVROW_SSSE3
-// TODO(fbarchard): pass xmm constants to single block of assembly.
-// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
-// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
-// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around
-// and considered unsafe.
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
+ MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm6 \n"
+
"lea " MEMLEA(0x40,0) ",%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
@@ -942,52 +939,113 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kARGBToV), // %5
+ "m"(kARGBToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
);
}
+#endif // HAS_ARGBTOUVROW_SSSE3
-// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
+#ifdef HAS_ARGBTOUVROW_AVX2
+// vpshufb for vphaddw + vpackuswb packed to shorts.
+static const lvec8 kShufARGBToUV_AVX = {
+ 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
+ 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15
+};
+void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
+ uint8* dst_u, uint8* dst_v, int width) {
+ asm volatile (
+ "vbroadcastf128 %5,%%ymm5 \n"
+ "vbroadcastf128 %6,%%ymm6 \n"
+ "vbroadcastf128 %7,%%ymm7 \n"
+ "sub %1,%2 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
+ "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
+ VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
+ VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
+ VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2)
+ VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3)
+ "lea " MEMLEA(0x80,0) ",%0 \n"
+ "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
+ "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
+ "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
+
+ "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
+ "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
+ "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
+ "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsraw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm0,%%ymm0 \n"
+ "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpshufb %8,%%ymm0,%%ymm0 \n"
+ "vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
+
+ "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n"
+ VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1)
+ "lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kAddUV128), // %5
+ "m"(kARGBToV), // %6
+ "m"(kARGBToU), // %7
+ "m"(kShufARGBToUV_AVX) // %8
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+#endif // HAS_ARGBTOUVROW_AVX2
+
+#ifdef HAS_ARGBTOUVJROW_SSSE3
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToUJ), // %0
- "m"(kARGBToVJ), // %1
- "m"(kAddUVJ128) // %2
- );
- asm volatile (
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
+ MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
+ "pavgb %%xmm7,%%xmm6 \n"
+
"lea " MEMLEA(0x40,0) ",%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
@@ -1010,245 +1068,32 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
"psraw $0x8,%%xmm0 \n"
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToUJ), // %0
- "m"(kARGBToVJ), // %1
- "m"(kAddUVJ128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb))
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kARGBToVJ), // %5
+ "m"(kARGBToUJ), // %6
+ "m"(kAddUVJ128) // %7
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
);
}
+#endif // HAS_ARGBTOUVJROW_SSSE3
+#ifdef HAS_ARGBTOUV444ROW_SSSE3
void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,2,1) // movdqa %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6"
-#endif
- );
-}
-
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u,
- uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
+ "movdqa %4,%%xmm3 \n"
+ "movdqa %5,%%xmm4 \n"
+ "movdqa %6,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
@@ -1266,7 +1111,6 @@ void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u,
"psraw $0x8,%%xmm2 \n"
"packsswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
@@ -1283,98 +1127,30 @@ void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u,
"packsswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
"lea " MEMLEA(0x40,0) ",%0 \n"
- BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6"
-#endif
+ : "m"(kARGBToV), // %4
+ "m"(kARGBToU), // %5
+ "m"(kAddUV128) // %6
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm6"
);
}
+#endif // HAS_ARGBTOUV444ROW_SSSE3
+#ifdef HAS_ARGBTOUV422ROW_SSSE3
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
+ "movdqa %4,%%xmm3 \n"
+ "movdqa %5,%%xmm4 \n"
+ "movdqa %6,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
@@ -1403,65 +1179,25 @@ void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
+ : "m"(kARGBToV), // %4
+ "m"(kARGBToU), // %5
+ "m"(kAddUV128) // %6
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
);
}
+#endif // HAS_ARGBTOUV422ROW_SSSE3
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kBGRAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
+void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) {
asm volatile (
"movdqa %4,%%xmm5 \n"
"movdqa %3,%%xmm4 \n"
@@ -1482,116 +1218,41 @@ void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
"psrlw $0x7,%%xmm2 \n"
"packuswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_bgra), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kBGRAToY), // %3
"m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kBGRAToU), // %0
- "m"(kBGRAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_bgra0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_bgra)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kBGRAToU), // %0
- "m"(kBGRAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
+ MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
+ MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm6 \n"
+
"lea " MEMLEA(0x40,0) ",%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
@@ -1613,65 +1274,25 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_bgra0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
- : "r"((intptr_t)(src_stride_bgra)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
+ : "r"((intptr_t)(src_stride_bgra)), // %4
+ "m"(kBGRAToV), // %5
+ "m"(kBGRAToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
);
}
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kABGRToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
+void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width) {
asm volatile (
"movdqa %4,%%xmm5 \n"
"movdqa %3,%%xmm4 \n"
@@ -1692,60 +1313,20 @@ void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
"psrlw $0x7,%%xmm2 \n"
"packuswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kABGRToY), // %3
"m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kRGBAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
+void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width) {
asm volatile (
"movdqa %4,%%xmm5 \n"
"movdqa %3,%%xmm4 \n"
@@ -1766,116 +1347,41 @@ void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
"psrlw $0x7,%%xmm2 \n"
"packuswb %%xmm2,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "m"(kRGBAToY), // %3
"m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kABGRToU), // %0
- "m"(kABGRToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_abgr0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_abgr)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kABGRToU), // %0
- "m"(kABGRToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
+ MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
+ MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm6 \n"
+
"lea " MEMLEA(0x40,0) ",%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
@@ -1897,121 +1403,46 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_abgr0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
- : "r"((intptr_t)(src_stride_abgr)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
+ : "r"((intptr_t)(src_stride_abgr)), // %4
+ "m"(kABGRToV), // %5
+ "m"(kABGRToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
);
}
void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kRGBAToU), // %0
- "m"(kRGBAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgba0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_rgba))
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kRGBAToU), // %0
- "m"(kRGBAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
+ MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
+ MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
+ "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
"pavgb %%xmm7,%%xmm6 \n"
+
"lea " MEMLEA(0x40,0) ",%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
@@ -2033,175 +1464,217 @@ void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
"movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_rgba0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
- : "r"((intptr_t)(src_stride_rgba)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
+ : "r"((intptr_t)(src_stride_rgba)), // %4
+ "m"(kRGBAToV), // %5
+ "m"(kRGBAToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
);
}
-#endif // HAS_ARGBTOUVROW_SSSE3
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-struct {
- vec8 kUVToB; // 0
- vec8 kUVToG; // 16
- vec8 kUVToR; // 32
- vec16 kUVBiasB; // 48
- vec16 kUVBiasG; // 64
- vec16 kUVBiasR; // 80
- vec16 kYSub16; // 96
- vec16 kYToRgb; // 112
- vec8 kVUToB; // 128
- vec8 kVUToG; // 144
- vec8 kVUToR; // 160
-} static SIMD_ALIGNED(kYuvConstants) = {
- { UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
- { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
- { UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR },
- { BB, BB, BB, BB, BB, BB, BB, BB },
- { BG, BG, BG, BG, BG, BG, BG, BG },
- { BR, BR, BR, BR, BR, BR, BR, BR },
- { 16, 16, 16, 16, 16, 16, 16, 16 },
- { YG, YG, YG, YG, YG, YG, YG, YG },
- { VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB },
- { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
- { VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR }
-};
+#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
// Read 8 UV from 411
#define READYUV444 \
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- BUNDLEALIGN \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
// Read 4 UV from 422, upsample to 8 UV
#define READYUV422 \
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- BUNDLEALIGN \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n"
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
-// Read 2 UV from 411, upsample to 8 UV
-#define READYUV411 \
+// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
+#define READYUVA422 \
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- BUNDLEALIGN \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
+ "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
+ "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \
+ "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n"
+
+// Read 2 UV from 411, upsample to 8 UV.
+// reading 4 bytes is an msan violation.
+// "movd " MEMACCESS([u_buf]) ",%%xmm0 \n"
+// MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)
+// pinsrw fails with drmemory
+// __asm pinsrw xmm0, [esi], 0 /* U */
+// __asm pinsrw xmm1, [esi + edi], 0 /* V */
+#define READYUV411_TEMP \
+ "movzwl " MEMACCESS([u_buf]) ",%[temp] \n" \
+ "movd %[temp],%%xmm0 \n" \
+ MEMOPARG(movzwl,0x00,[u_buf],[v_buf],1,[temp]) " \n" \
+ "movd %[temp],%%xmm1 \n" \
"lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \
- "punpckldq %%xmm0,%%xmm0 \n"
+ "punpckldq %%xmm0,%%xmm0 \n" \
+ "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
// Read 4 UV from NV12, upsample to 8 UV
#define READNV12 \
"movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
"lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
- "punpcklwd %%xmm0,%%xmm0 \n"
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
+// Read 4 VU from NV21, upsample to 8 UV
+#define READNV21 \
+ "movq " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
+ "lea " MEMLEA(0x8, [vu_buf]) ",%[vu_buf] \n" \
+ "pshufb %[kShuffleNV21], %%xmm0 \n" \
+ "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
+
+// Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
+#define READYUY2 \
+ "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \
+ "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \
+ "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \
+ "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \
+ "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n"
+
+// Read 4 UYVY with 8 Y and update 4 UV to 8 UV.
+#define READUYVY \
+ "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \
+ "pshufb %[kShuffleUYVYY], %%xmm4 \n" \
+ "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \
+ "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \
+ "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n"
+
+#if defined(__x86_64__)
+#define YUVTORGB_SETUP(yuvconstants) \
+ "movdqa " MEMACCESS([yuvconstants]) ",%%xmm8 \n" \
+ "movdqa " MEMACCESS2(32, [yuvconstants]) ",%%xmm9 \n" \
+ "movdqa " MEMACCESS2(64, [yuvconstants]) ",%%xmm10 \n" \
+ "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm11 \n" \
+ "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm12 \n" \
+ "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm13 \n" \
+ "movdqa " MEMACCESS2(192, [yuvconstants]) ",%%xmm14 \n"
// Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB \
+#define YUVTORGB(yuvconstants) \
"movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \
- "pmaddubsw " MEMACCESS([kYuvConstants]) ",%%xmm0 \n" \
- "pmaddubsw " MEMACCESS2(16, [kYuvConstants]) ",%%xmm1 \n" \
- "pmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2 \n" \
- "psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \
- "psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \
- "psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
- "punpcklbw %%xmm4,%%xmm3 \n" \
- "psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
- "pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \
- "paddsw %%xmm3,%%xmm0 \n" \
- "paddsw %%xmm3,%%xmm1 \n" \
- "paddsw %%xmm3,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm3 \n" \
+ "movdqa %%xmm11,%%xmm0 \n" \
+ "pmaddubsw %%xmm8,%%xmm1 \n" \
+ "psubw %%xmm1,%%xmm0 \n" \
+ "movdqa %%xmm12,%%xmm1 \n" \
+ "pmaddubsw %%xmm9,%%xmm2 \n" \
+ "psubw %%xmm2,%%xmm1 \n" \
+ "movdqa %%xmm13,%%xmm2 \n" \
+ "pmaddubsw %%xmm10,%%xmm3 \n" \
+ "psubw %%xmm3,%%xmm2 \n" \
+ "pmulhuw %%xmm14,%%xmm4 \n" \
+ "paddsw %%xmm4,%%xmm0 \n" \
+ "paddsw %%xmm4,%%xmm1 \n" \
+ "paddsw %%xmm4,%%xmm2 \n" \
"psraw $0x6,%%xmm0 \n" \
"psraw $0x6,%%xmm1 \n" \
"psraw $0x6,%%xmm2 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"packuswb %%xmm1,%%xmm1 \n" \
"packuswb %%xmm2,%%xmm2 \n"
+#define YUVTORGB_REGS \
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
-// Convert 8 pixels: 8 VU and 8 Y
-#define YVUTORGB \
+#else
+#define YUVTORGB_SETUP(yuvconstants)
+// Convert 8 pixels: 8 UV and 8 Y
+#define YUVTORGB(yuvconstants) \
"movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \
- "pmaddubsw " MEMACCESS2(128, [kYuvConstants]) ",%%xmm0 \n" \
- "pmaddubsw " MEMACCESS2(144, [kYuvConstants]) ",%%xmm1 \n" \
- "pmaddubsw " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2 \n" \
- "psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \
- "psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \
- "psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
- "punpcklbw %%xmm4,%%xmm3 \n" \
- "psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
- "pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \
- "paddsw %%xmm3,%%xmm0 \n" \
- "paddsw %%xmm3,%%xmm1 \n" \
- "paddsw %%xmm3,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm3 \n" \
+ "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \
+ "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \
+ "psubw %%xmm1,%%xmm0 \n" \
+ "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \
+ "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \
+ "psubw %%xmm2,%%xmm1 \n" \
+ "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
+ "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
+ "psubw %%xmm3,%%xmm2 \n" \
+ "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \
+ "paddsw %%xmm4,%%xmm0 \n" \
+ "paddsw %%xmm4,%%xmm1 \n" \
+ "paddsw %%xmm4,%%xmm2 \n" \
"psraw $0x6,%%xmm0 \n" \
"psraw $0x6,%%xmm1 \n" \
"psraw $0x6,%%xmm2 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"packuswb %%xmm1,%%xmm1 \n" \
"packuswb %%xmm2,%%xmm2 \n"
+#define YUVTORGB_REGS
+#endif
+
+// Store 8 ARGB values.
+#define STOREARGB \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklbw %%xmm5,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "punpcklwd %%xmm2,%%xmm0 \n" \
+ "punpckhwd %%xmm2,%%xmm1 \n" \
+ "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \
+ "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \
+ "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n"
+
+// Store 8 RGBA values.
+#define STORERGBA \
+ "pcmpeqb %%xmm5,%%xmm5 \n" \
+ "punpcklbw %%xmm2,%%xmm1 \n" \
+ "punpcklbw %%xmm0,%%xmm5 \n" \
+ "movdqa %%xmm5,%%xmm0 \n" \
+ "punpcklwd %%xmm1,%%xmm5 \n" \
+ "punpckhwd %%xmm1,%%xmm0 \n" \
+ "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \
+ "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
+ "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
READYUV444
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+ YUVTORGB(yuvconstants)
+ STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
@@ -2209,14 +1682,9 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
@@ -2224,27 +1692,17 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
int width) {
-// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
-#if defined(__i386__)
asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
- :: [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
- [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24));
-#endif
-
- asm volatile (
-#if !defined(__i386__)
- "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
-#endif
"sub %[u_buf],%[v_buf] \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB
+ YUVTORGB(yuvconstants)
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm2,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
@@ -2256,83 +1714,22 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
"movq %%xmm0," MEMACCESS([dst_rgb24]) "\n"
"movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
"lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
- "sub $0x8,%[width] \n"
+ "subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
+#if defined(__i386__) && defined(__pic__)
+ [width]"+m"(width) // %[width]
+#else
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
-#if !defined(__i386__)
- , [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
+#endif
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
[kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
-#endif
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_raw,
- int width) {
-// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
-#if defined(__i386__)
- asm volatile (
- "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
- :: [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
- [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW));
-#endif
-
- asm volatile (
-#if !defined(__i386__)
- "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
-#endif
- "sub %[u_buf],%[v_buf] \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS([dst_raw]) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
- "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_raw]"+r"(dst_raw), // %[dst_raw]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
-#if !defined(__i386__)
- , [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
- [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
-#endif
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
@@ -2340,23 +1737,17 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+ YUVTORGB(yuvconstants)
+ STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
@@ -2364,385 +1755,181 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
+#ifdef HAS_I422ALPHATOARGBROW_SSSE3
+void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ LABELALIGN
+ "1: \n"
+ READYUVA422
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "subl $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [a_buf]"+r"(a_buf), // %[a_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+#if defined(__i386__) && defined(__pic__)
+ [width]"+m"(width) // %[width]
+#else
+ [width]"+rm"(width) // %[width]
+#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I422ALPHATOARGBROW_SSSE3
+
+#ifdef HAS_I411TOARGBROW_SSSE3
void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
+ int temp = 0;
asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
- READYUV411
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
+ READYUV411_TEMP
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [temp]"+r"(temp), // %[temp]
+#if defined(__i386__) && defined(__pic__)
+ [width]"+m"(width) // %[width]
+#else
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
+#endif
void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
READNV12
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+ YUVTORGB(yuvconstants)
+ STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
+ const uint8* vu_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
- READNV12
- YVUTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+ READNV21
+ YUVTORGB(yuvconstants)
+ STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
+ [vu_buf]"+r"(vu_buf), // %[vu_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleNV21]"m"(kShuffleNV21)
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV444
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV411
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YVUTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
+void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- "sub %[u_buf],%[v_buf] \n"
+ YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
- READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqa %%xmm5," MEMACCESS([dst_bgra]) "\n"
- "movdqa %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
- "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
+ READYUY2
+ YUVTORGB(yuvconstants)
+ STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
+ : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
+ [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
+void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- "sub %[u_buf],%[v_buf] \n"
+ YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm2 \n"
- "punpckhwd %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2," MEMACCESS([dst_abgr]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
- "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
+ READUYVY
+ YUVTORGB(yuvconstants)
+ STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
+ : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleUYVYY]"m"(kShuffleUYVYY),
+ [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
@@ -2750,24 +1937,17 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm2,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqa %%xmm5," MEMACCESS([dst_rgba]) "\n"
- "movdqa %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
- "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
+ YUVTORGB(yuvconstants)
+ STORERGBA
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
@@ -2775,160 +1955,446 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqu %%xmm5," MEMACCESS([dst_bgra]) "\n"
- "movdqu %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
- "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm2 \n"
- "punpckhwd %%xmm0,%%xmm1 \n"
- "movdqu %%xmm2," MEMACCESS([dst_abgr]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
- "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm2,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqu %%xmm5," MEMACCESS([dst_rgba]) "\n"
- "movdqu %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
- "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I422TOARGBROW_SSSE3
-#ifdef HAS_YTOARGBROW_SSE2
-void YToARGBRow_SSE2(const uint8* y_buf,
- uint8* dst_argb,
- int width) {
+// Read 16 UV from 444
+#define READYUV444_AVX2 \
+ "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \
+ MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \
+ "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+
+// Read 8 UV from 422, upsample to 16 UV.
+#define READYUV422_AVX2 \
+ "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
+ MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \
+ "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+
+// Read 8 UV from 422, upsample to 16 UV. With 16 Alpha.
+#define READYUVA422_AVX2 \
+ "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
+ MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \
+ "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
+ "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \
+ "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
+ "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
+
+// Read 8 UV from NV12, upsample to 16 UV.
+#define READNV12_AVX2 \
+ "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
+ "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+
+// Read 8 VU from NV21, upsample to 16 UV.
+#define READNV21_AVX2 \
+ "vmovdqu " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
+ "lea " MEMLEA(0x10, [vu_buf]) ",%[vu_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \
+ "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+
+// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
+#define READYUY2_AVX2 \
+ "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm4 \n" \
+ "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \
+ "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm0 \n" \
+ "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \
+ "lea " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf] \n"
+
+// Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.
+#define READUYVY_AVX2 \
+ "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm4 \n" \
+ "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \
+ "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm0 \n" \
+ "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \
+ "lea " MEMLEA(0x20, [uyvy_buf]) ",%[uyvy_buf] \n"
+
+#if defined(__x86_64__)
+#define YUVTORGB_SETUP_AVX2(yuvconstants) \
+ "vmovdqa " MEMACCESS([yuvconstants]) ",%%ymm8 \n" \
+ "vmovdqa " MEMACCESS2(32, [yuvconstants]) ",%%ymm9 \n" \
+ "vmovdqa " MEMACCESS2(64, [yuvconstants]) ",%%ymm10 \n" \
+ "vmovdqa " MEMACCESS2(96, [yuvconstants]) ",%%ymm11 \n" \
+ "vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \
+ "vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \
+ "vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n"
+#define YUVTORGB_AVX2(yuvconstants) \
+ "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \
+ "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \
+ "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \
+ "vpsubw %%ymm2,%%ymm13,%%ymm2 \n" \
+ "vpsubw %%ymm1,%%ymm12,%%ymm1 \n" \
+ "vpsubw %%ymm0,%%ymm11,%%ymm0 \n" \
+ "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \
+ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \
+ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \
+ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \
+ "vpsraw $0x6,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x6,%%ymm1,%%ymm1 \n" \
+ "vpsraw $0x6,%%ymm2,%%ymm2 \n" \
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
+ "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
+#define YUVTORGB_REGS_AVX2 \
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
+#else// Convert 16 pixels: 16 UV and 16 Y.
+#define YUVTORGB_SETUP_AVX2(yuvconstants)
+#define YUVTORGB_AVX2(yuvconstants) \
+ "vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \
+ "vpmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%ymm0,%%ymm1 \n" \
+ "vpmaddubsw " MEMACCESS([yuvconstants]) ",%%ymm0,%%ymm0 \n" \
+ "vmovdqu " MEMACCESS2(160, [yuvconstants]) ",%%ymm3 \n" \
+ "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
+ "vmovdqu " MEMACCESS2(128, [yuvconstants]) ",%%ymm3 \n" \
+ "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \
+ "vmovdqu " MEMACCESS2(96, [yuvconstants]) ",%%ymm3 \n" \
+ "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \
+ "vpmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%ymm4,%%ymm4 \n" \
+ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \
+ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \
+ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \
+ "vpsraw $0x6,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x6,%%ymm1,%%ymm1 \n" \
+ "vpsraw $0x6,%%ymm2,%%ymm2 \n" \
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
+ "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
+#define YUVTORGB_REGS_AVX2
+#endif
+
+// Store 16 ARGB values.
+#define STOREARGB_AVX2 \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
+ "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \
+ "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \
+ "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \
+ "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \
+ "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n"
+
+#ifdef HAS_I444TOARGBROW_AVX2
+// 16 pixels
+// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "mov $0x00100010,%%eax \n"
- "movd %%eax,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "mov $0x004a004a,%%eax \n"
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ READYUV444_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I444TOARGBROW_AVX2
+
+#if defined(HAS_I422TOARGBROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ READYUV422_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I422TOARGBROW_AVX2
+
+#if defined(HAS_I422ALPHATOARGBROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
+void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ LABELALIGN
+ "1: \n"
+ READYUVA422_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "subl $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [a_buf]"+r"(a_buf), // %[a_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+#if defined(__i386__) && defined(__pic__)
+ [width]"+m"(width) // %[width]
+#else
+ [width]"+rm"(width) // %[width]
+#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I422ALPHATOARGBROW_AVX2
+
+#if defined(HAS_I422TORGBAROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
+void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ READYUV422_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+
+ // Step 3: Weave into RGBA
+ "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpunpcklbw %%ymm0,%%ymm5,%%ymm2 \n"
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n"
+ "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n"
+ "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n"
+ "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n"
+ "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n"
+ "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I422TORGBAROW_AVX2
+
+#if defined(HAS_NV12TOARGBROW_AVX2)
+// 16 pixels.
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* uv_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ READNV12_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [uv_buf]"+r"(uv_buf), // %[uv_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_NV12TOARGBROW_AVX2
+
+#if defined(HAS_NV21TOARGBROW_AVX2)
+// 16 pixels.
+// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* vu_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ READNV21_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [vu_buf]"+r"(vu_buf), // %[vu_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleNV21]"m"(kShuffleNV21)
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_NV21TOARGBROW_AVX2
+
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+// 16 pixels.
+// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ READYUY2_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
+ [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_YUY2TOARGBROW_AVX2
+
+#if defined(HAS_UYVYTOARGBROW_AVX2)
+// 16 pixels.
+// 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ READUYVY_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleUYVYY]"m"(kShuffleUYVYY),
+ [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_UYVYTOARGBROW_AVX2
+
+#ifdef HAS_I400TOARGBROW_SSE2
+void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) {
+ asm volatile (
+ "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
"movd %%eax,%%xmm2 \n"
"pshufd $0x0,%%xmm2,%%xmm2 \n"
+ "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * 16
+ "movd %%eax,%%xmm3 \n"
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "pslld $0x18,%%xmm4 \n"
LABELALIGN
"1: \n"
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
"movq " MEMACCESS(0) ",%%xmm0 \n"
"lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
"psubusw %%xmm3,%%xmm0 \n"
- "pmullw %%xmm2,%%xmm0 \n"
"psrlw $6, %%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
@@ -2939,8 +2405,8 @@ void YToARGBRow_SSE2(const uint8* y_buf,
"punpckhwd %%xmm1,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"por %%xmm4,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x8,%2 \n"
@@ -2950,12 +2416,57 @@ void YToARGBRow_SSE2(const uint8* y_buf,
"+rm"(width) // %2
:
: "memory", "cc", "eax"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
);
}
-#endif // HAS_YTOARGBROW_SSE2
+#endif // HAS_I400TOARGBROW_SSE2
+
+#ifdef HAS_I400TOARGBROW_AVX2
+// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
+// note: vpunpcklbw mutates and vpackuswb unmutates.
+void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) {
+ asm volatile (
+ "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * 16
+ "vmovd %%eax,%%xmm2 \n"
+ "vbroadcastss %%xmm2,%%ymm2 \n"
+ "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
+ "vmovd %%eax,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpslld $0x18,%%ymm4,%%ymm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164
+ "vmovdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "lea " MEMLEA(0x10,0) ",%0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x6,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n"
+ "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm4,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
+ "lea " MEMLEA(0x40,1) ",%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(y_buf), // %0
+ "+r"(dst_argb), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc", "eax"
+ , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
+ );
+}
+#endif // HAS_I400TOARGBROW_AVX2
#ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes.
@@ -2967,63 +2478,48 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
asm volatile (
"movdqa %3,%%xmm5 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
LABELALIGN
"1: \n"
- MEMOPREG(movdqa,0x00,0,2,1,xmm0) // movdqa (%0,%2),%%xmm0
+ MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0
"pshufb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(temp_width) // %2
: "m"(kShuffleMirror) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm5"
);
}
#endif // HAS_MIRRORROW_SSSE3
-#ifdef HAS_MIRRORROW_SSE2
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
+#ifdef HAS_MIRRORROW_AVX2
+void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
asm volatile (
- "lea " MEMLEA(-0x10,0) ",%0 \n"
+ "vbroadcastf128 %3,%%ymm5 \n"
LABELALIGN
"1: \n"
- MEMOPREG(movdqu,0x00,0,2,1,xmm0) // movdqu (%0,%2),%%xmm0
- "movdqa %%xmm0,%%xmm1 \n"
- "psllw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "pshuflw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufhw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufd $0x4e,%%xmm0,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1)",%1 \n"
+ MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0
+ "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpermq $0x4e,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(temp_width) // %2
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ : "m"(kShuffleMirror) // %3
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm5"
);
}
-#endif // HAS_MIRRORROW_SSE2
+#endif // HAS_MIRRORROW_AVX2
#ifdef HAS_MIRRORROW_UV_SSSE3
// Shuffle table for reversing the bytes of UV channels.
@@ -3035,108 +2531,121 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
intptr_t temp_width = (intptr_t)(width);
asm volatile (
"movdqa %4,%%xmm1 \n"
- "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n"
+ "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "lea " MEMLEA(-0x10,0) ",%0 \n"
"pshufb %%xmm1,%%xmm0 \n"
- "sub $8,%3 \n"
"movlpd %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $8,%3 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(temp_width) // %3
: "m"(kShuffleMirrorUV) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1"
);
}
#endif // HAS_MIRRORROW_UV_SSSE3
-#ifdef HAS_ARGBMIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static uvec8 kARGBShuffleMirror = {
- 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
-};
+#ifdef HAS_ARGBMIRRORROW_SSE2
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
asm volatile (
"lea " MEMLEA4(-0x10,0,2,4) ",%0 \n"
- "movdqa %3,%%xmm5 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "pshufd $0x1b,%%xmm0,%%xmm0 \n"
"lea " MEMLEA(-0x10,0) ",%0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(temp_width) // %2
- : "m"(kARGBShuffleMirror) // %3
+ :
: "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm5"
-#endif
+ , "xmm0"
);
}
-#endif // HAS_ARGBMIRRORROW_SSSE3
+#endif // HAS_ARGBMIRRORROW_SSE2
-#ifdef HAS_SPLITUVROW_SSE2
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+#ifdef HAS_ARGBMIRRORROW_AVX2
+// Shuffle table for reversing the bytes.
+static const ulvec32 kARGBShuffleMirror_AVX2 = {
+ 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
+};
+void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+ intptr_t temp_width = (intptr_t)(width);
asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
+ "vmovdqu %3,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ : "m"(kARGBShuffleMirror_AVX2) // %3
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm5"
+ );
+}
+#endif // HAS_ARGBMIRRORROW_AVX2
+
+#ifdef HAS_SPLITUVROW_AVX2
+void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) {
+ asm volatile (
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movdqa,xmm2,0x00,1,2,1) // movdqa %%xmm2,(%1,%2)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%3 \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm2 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm3 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ MEMOPMEM(vmovdqu,ymm2,0x00,1,2,1) // vmovdqu %%ymm2,(%1,%2)
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%3 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
+#endif // HAS_SPLITUVROW_AVX2
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix) {
+#ifdef HAS_SPLITUVROW_SSE2
+void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
@@ -3162,54 +2671,48 @@ void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_SPLITUVROW_SSE2
-#ifdef HAS_MERGEUVROW_SSE2
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+#ifdef HAS_MERGEUVROW_AVX2
+void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
asm volatile (
"sub %0,%1 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm2 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "movdqa %%xmm2," MEMACCESS2(0x10,2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x10,%3 \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1
+ "lea " MEMLEA(0x20,0) ",%0 \n"
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
+ "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n"
+ "vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n"
+ "vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n"
+ "vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n"
+ "lea " MEMLEA(0x40,2) ",%2 \n"
+ "sub $0x20,%3 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_u), // %0
"+r"(src_v), // %1
"+r"(dst_uv), // %2
"+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2"
);
}
+#endif // HAS_MERGEUVROW_AVX2
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
- uint8* dst_uv, int width) {
+#ifdef HAS_MERGEUVROW_SSE2
+void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+ int width) {
asm volatile (
"sub %0,%1 \n"
LABELALIGN
@@ -3230,13 +2733,8 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
"+r"(dst_uv), // %2
"+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2"
);
}
#endif // HAS_MERGEUVROW_SSE2
@@ -3244,6 +2742,10 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
#ifdef HAS_COPYROW_SSE2
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
asm volatile (
+ "test $0xf,%0 \n"
+ "jne 2f \n"
+ "test $0xf,%1 \n"
+ "jne 2f \n"
LABELALIGN
"1: \n"
"movdqa " MEMACCESS(0) ",%%xmm0 \n"
@@ -3254,35 +2756,53 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x20,%2 \n"
"jg 1b \n"
+ "jmp 9f \n"
+ LABELALIGN
+ "2: \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "lea " MEMLEA(0x20,0) ",%0 \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 2b \n"
+ "9: \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(count) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1"
-#endif
);
}
#endif // HAS_COPYROW_SSE2
-#ifdef HAS_COPYROW_X86
-void CopyRow_X86(const uint8* src, uint8* dst, int width) {
- size_t width_tmp = (size_t)(width);
+#ifdef HAS_COPYROW_AVX
+void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
asm volatile (
- "shr $0x2,%2 \n"
- "rep movsl " MEMMOVESTRING(0,1) " \n"
- : "+S"(src), // %0
- "+D"(dst), // %1
- "+c"(width_tmp) // %2
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
+ "lea " MEMLEA(0x40,1) ",%1 \n"
+ "sub $0x40,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(count) // %2
:
: "memory", "cc"
+ , "xmm0", "xmm1"
);
}
-#endif // HAS_COPYROW_X86
+#endif // HAS_COPYROW_AVX
#ifdef HAS_COPYROW_ERMS
-// Unaligned Multiple of 1.
+// Multiple of 1.
void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
size_t width_tmp = (size_t)(width);
asm volatile (
@@ -3306,19 +2826,19 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
"psrld $0x8,%%xmm1 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm2 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa " MEMACCESS(1) ",%%xmm4 \n"
- "movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n"
+ "movdqu " MEMACCESS(1) ",%%xmm4 \n"
+ "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n"
"pand %%xmm0,%%xmm2 \n"
"pand %%xmm0,%%xmm3 \n"
"pand %%xmm1,%%xmm4 \n"
"pand %%xmm1,%%xmm5 \n"
"por %%xmm4,%%xmm2 \n"
"por %%xmm5,%%xmm3 \n"
- "movdqa %%xmm2," MEMACCESS(1) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm2," MEMACCESS(1) " \n"
+ "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
@@ -3327,9 +2847,7 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
"+r"(width) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
);
}
#endif // HAS_ARGBCOPYALPHAROW_SSE2
@@ -3358,9 +2876,7 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
"+r"(width) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2"
-#endif
);
}
#endif // HAS_ARGBCOPYALPHAROW_AVX2
@@ -3380,16 +2896,16 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
"punpcklbw %%xmm2,%%xmm2 \n"
"punpckhwd %%xmm2,%%xmm3 \n"
"punpcklwd %%xmm2,%%xmm2 \n"
- "movdqa " MEMACCESS(1) ",%%xmm4 \n"
- "movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n"
+ "movdqu " MEMACCESS(1) ",%%xmm4 \n"
+ "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n"
"pand %%xmm0,%%xmm2 \n"
"pand %%xmm0,%%xmm3 \n"
"pand %%xmm1,%%xmm4 \n"
"pand %%xmm1,%%xmm5 \n"
"por %%xmm4,%%xmm2 \n"
"por %%xmm5,%%xmm3 \n"
- "movdqa %%xmm2," MEMACCESS(1) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm2," MEMACCESS(1) " \n"
+ "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
@@ -3398,9 +2914,7 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
"+r"(width) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
);
}
#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
@@ -3431,18 +2945,16 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
"+r"(width) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2"
-#endif
);
}
#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
#ifdef HAS_SETROW_X86
-void SetRow_X86(uint8* dst, uint32 v32, int width) {
- size_t width_tmp = (size_t)(width);
+void SetRow_X86(uint8* dst, uint8 v8, int width) {
+ size_t width_tmp = (size_t)(width >> 2);
+ const uint32 v32 = v8 * 0x01010101; // Duplicate byte to all bytes.
asm volatile (
- "shr $0x2,%1 \n"
"rep stosl " MEMSTORESTRING(eax,0) " \n"
: "+D"(dst), // %0
"+c"(width_tmp) // %1
@@ -3450,63 +2962,65 @@ void SetRow_X86(uint8* dst, uint32 v32, int width) {
: "memory", "cc");
}
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- for (int y = 0; y < height; ++y) {
- size_t width_tmp = (size_t)(width);
- uint32* d = (uint32*)(dst);
- asm volatile (
- "rep stosl " MEMSTORESTRING(eax,0) " \n"
- : "+D"(d), // %0
- "+c"(width_tmp) // %1
- : "a"(v32) // %2
- : "memory", "cc");
- dst += dst_stride;
- }
+void SetRow_ERMS(uint8* dst, uint8 v8, int width) {
+ size_t width_tmp = (size_t)(width);
+ asm volatile (
+ "rep stosb " MEMSTORESTRING(al,0) " \n"
+ : "+D"(dst), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v8) // %2
+ : "memory", "cc");
+}
+
+void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) {
+ size_t width_tmp = (size_t)(width);
+ asm volatile (
+ "rep stosl " MEMSTORESTRING(eax,0) " \n"
+ : "+D"(dst_argb), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v32) // %2
+ : "memory", "cc");
}
#endif // HAS_SETROW_X86
#ifdef HAS_YUY2TOYROW_SSE2
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
+void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"pand %%xmm5,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm5"
-#endif
);
}
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2
- MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
+ MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n"
@@ -3519,7 +3033,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
@@ -3527,28 +3040,23 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
: "+r"(src_yuy2), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
: "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
@@ -3559,7 +3067,6 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
@@ -3567,270 +3074,38 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
: "+r"(src_yuy2), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm5"
);
}
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
+void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width) {
asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
"sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
"jg 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1"
-#endif
);
}
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2
- MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
@@ -3839,7 +3114,6 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
@@ -3854,7 +3128,6 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
@@ -3862,20 +3135,15 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
: "+r"(src_uyvy), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
: "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
@@ -3894,7 +3162,6 @@ void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x10,%3 \n"
@@ -3902,140 +3169,219 @@ void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
: "+r"(src_uyvy), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm5"
);
}
#endif // HAS_YUY2TOYROW_SSE2
-#ifdef HAS_ARGBBLENDROW_SSE2
-// Blend 8 pixels at a time.
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
+#ifdef HAS_YUY2TOYROW_AVX2
+void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) {
asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0xf,%%xmm7 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x8,%%xmm6 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psllw $0x8,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "sub $0x1,%3 \n"
- "je 91f \n"
- "jl 99f \n"
-
- // 1 pixel loop until destination pointer is aligned.
- "10: \n"
- "test $0xf,%2 \n"
- "je 19f \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "jge 10b \n"
-
- "19: \n"
- "add $1-4,%3 \n"
- "jl 49f \n"
-
- // 4 pixel loop.
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
LABELALIGN
- "41: \n"
- "movdqu " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 41b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 99f \n"
-
- // 1 pixel loop.
- "91: \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "jge 91b \n"
- "99: \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
:
: "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ , "xmm0", "xmm1", "xmm5"
);
}
-#endif // HAS_ARGBBLENDROW_SSE2
+
+void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
+ uint8* dst_u, uint8* dst_v, int width) {
+ asm volatile (
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
+ VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
+ VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
+ "lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_yuy2)) // %4
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm5"
+ );
+}
+
+void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
+ uint8* dst_u, uint8* dst_v, int width) {
+ asm volatile (
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
+ VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
+ "lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm5"
+ );
+}
+
+void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width) {
+ asm volatile (
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc"
+ , "xmm0", "xmm1", "xmm5"
+ );
+}
+void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
+ uint8* dst_u, uint8* dst_v, int width) {
+ asm volatile (
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
+ VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
+ VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
+ "lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_uyvy)) // %4
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm5"
+ );
+}
+
+void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
+ uint8* dst_u, uint8* dst_v, int width) {
+ asm volatile (
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
+ VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
+ "lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm5"
+ );
+}
+#endif // HAS_YUY2TOYROW_AVX2
#ifdef HAS_ARGBBLENDROW_SSSE3
// Shuffle table for isolating alpha.
@@ -4045,15 +3391,6 @@ static uvec8 kShuffleAlpha = {
};
// Blend 8 pixels at a time
-// Shuffle table for reversing the bytes.
-
-// Same as SSE2, but replaces
-// psrlw xmm3, 8 // alpha
-// pshufhw xmm3, xmm3,0F5h // 8 alpha words
-// pshuflw xmm3, xmm3,0F5h
-// with..
-// pshufb xmm3, kShuffleAlpha // alpha
-
void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
@@ -4065,75 +3402,12 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"psllw $0x8,%%xmm5 \n"
"pcmpeqb %%xmm4,%%xmm4 \n"
"pslld $0x18,%%xmm4 \n"
- "sub $0x1,%3 \n"
- "je 91f \n"
- "jl 99f \n"
-
- // 1 pixel loop until destination pointer is aligned.
- "10: \n"
- "test $0xf,%2 \n"
- "je 19f \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "jge 10b \n"
-
- "19: \n"
- "add $1-4,%3 \n"
+ "sub $0x4,%3 \n"
"jl 49f \n"
- "test $0xf,%0 \n"
- "jne 41f \n"
- "test $0xf,%1 \n"
- "jne 41f \n"
// 4 pixel loop.
LABELALIGN
"40: \n"
- "movdqa " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqa " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqa " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 40b \n"
- "jmp 49f \n"
-
- // 4 pixel unaligned loop.
- LABELALIGN
- "41: \n"
"movdqu " MEMACCESS(0) ",%%xmm3 \n"
"lea " MEMLEA(0x10,0) ",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
@@ -4152,10 +3426,10 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"paddusb %%xmm2,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
+ "movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 41b \n"
+ "sub $0x4,%3 \n"
+ "jge 40b \n"
"49: \n"
"add $0x3,%3 \n"
@@ -4181,9 +3455,9 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"paddusb %%xmm2,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
"movd %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x4,2) ",%2 \n"
+ "sub $0x1,%3 \n"
"jge 91b \n"
"99: \n"
: "+r"(src_argb0), // %0
@@ -4192,71 +3466,128 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"+r"(width) // %3
: "m"(kShuffleAlpha) // %4
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
);
}
#endif // HAS_ARGBBLENDROW_SSSE3
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-// aligned to 16 bytes
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
+#ifdef HAS_BLENDPLANEROW_SSSE3
+// Blend 8 pixels at a time.
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) {
asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x8,%%xmm5 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psllw $0x8,%%xmm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "movd %%eax,%%xmm6 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "mov $0x807f807f,%%eax \n"
+ "movd %%eax,%%xmm7 \n"
+ "pshufd $0x0,%%xmm7,%%xmm7 \n"
+ "sub %2,%0 \n"
+ "sub %2,%1 \n"
+ "sub %2,%3 \n"
- // 4 pixel loop.
+ // 8 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "pshufhw $0xff,%%xmm0,%%xmm2 \n"
- "pshuflw $0xff,%%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "pshufhw $0xff,%%xmm1,%%xmm2 \n"
- "pshuflw $0xff,%%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "pand %%xmm4,%%xmm2 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
+ "movq (%2),%%xmm0 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "pxor %%xmm5,%%xmm0 \n"
+ "movq (%0,%2,1),%%xmm1 \n"
+ "movq (%1,%2,1),%%xmm2 \n"
+ "punpcklbw %%xmm2,%%xmm1 \n"
+ "psubb %%xmm6,%%xmm1 \n"
+ "pmaddubsw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm7,%%xmm0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%3,%2,1) \n"
+ "lea 0x8(%2),%2 \n"
+ "sub $0x8,%4 \n"
"jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "+r"(src0), // %0
+ "+r"(src1), // %1
+ "+r"(alpha), // %2
+ "+r"(dst), // %3
+ "+r"(width) // %4
+ :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7"
);
}
-#endif // HAS_ARGBATTENUATEROW_SSE2
+#endif // HAS_BLENDPLANEROW_SSSE3
+
+#ifdef HAS_BLENDPLANEROW_AVX2
+// Blend 32 pixels at a time.
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) {
+ asm volatile (
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsllw $0x8,%%ymm5,%%ymm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "vmovd %%eax,%%xmm6 \n"
+ "vbroadcastss %%xmm6,%%ymm6 \n"
+ "mov $0x807f807f,%%eax \n"
+ "vmovd %%eax,%%xmm7 \n"
+ "vbroadcastss %%xmm7,%%ymm7 \n"
+ "sub %2,%0 \n"
+ "sub %2,%1 \n"
+ "sub %2,%3 \n"
+
+ // 32 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%2),%%ymm0 \n"
+ "vpunpckhbw %%ymm0,%%ymm0,%%ymm3 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpxor %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpxor %%ymm5,%%ymm0,%%ymm0 \n"
+ "vmovdqu (%0,%2,1),%%ymm1 \n"
+ "vmovdqu (%1,%2,1),%%ymm2 \n"
+ "vpunpckhbw %%ymm2,%%ymm1,%%ymm4 \n"
+ "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
+ "vpsubb %%ymm6,%%ymm4,%%ymm4 \n"
+ "vpsubb %%ymm6,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm7,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm3,%%ymm3 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%3,%2,1) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x20,%4 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src0), // %0
+ "+r"(src1), // %1
+ "+r"(alpha), // %2
+ "+r"(dst), // %3
+ "+r"(width) // %4
+ :: "memory", "cc", "eax",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+#endif // HAS_BLENDPLANEROW_AVX2
#ifdef HAS_ARGBATTENUATEROW_SSSE3
// Shuffle table duplicating alpha
static uvec8 kShuffleAlpha0 = {
- 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
+ 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u
};
static uvec8 kShuffleAlpha1 = {
11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
- 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
+ 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u
};
// Attenuate 4 pixels at a time.
-// aligned to 16 bytes
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
asm volatile (
"pcmpeqb %%xmm3,%%xmm3 \n"
@@ -4284,9 +3615,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"por %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
@@ -4294,16 +3625,56 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
: "m"(kShuffleAlpha0), // %3
"m"(kShuffleAlpha1) // %4
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
);
}
#endif // HAS_ARGBATTENUATEROW_SSSE3
+#ifdef HAS_ARGBATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const uvec8 kShuffleAlpha_AVX2 = {
+ 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u
+};
+// Attenuate 8 pixels at a time.
+void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
+ asm volatile (
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpslld $0x18,%%ymm5,%%ymm5 \n"
+ "sub %0,%1 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm6 \n"
+ "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
+ "vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
+ "vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpand %%ymm5,%%ymm6,%%ymm6 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm6,%%ymm0,%%ymm0 \n"
+ MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1)
+ "lea " MEMLEA(0x20,0) ",%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAlpha_AVX2) // %3
+ : "memory", "cc"
+ , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+ );
+}
+#endif // HAS_ARGBATTENUATEROW_AVX2
+
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
// Unattenuate 4 pixels at a time.
-// aligned to 16 bytes
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) {
uintptr_t alpha = 0;
@@ -4324,7 +3695,6 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"movzb " MEMACCESS2(0x0b,0) ",%3 \n"
"punpckhbw %%xmm1,%%xmm1 \n"
- BUNDLEALIGN
MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
"movzb " MEMACCESS2(0x0f,0) ",%3 \n"
MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
@@ -4334,26 +3704,90 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"pmulhuw %%xmm2,%%xmm1 \n"
"lea " MEMLEA(0x10,0) ",%0 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width), // %2
"+r"(alpha) // %3
: "r"(fixed_invtbl8) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_ARGBUNATTENUATEROW_SSE2
+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const uvec8 kUnattenShuffleAlpha_AVX2 = {
+ 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u
+};
+// Unattenuate 8 pixels at a time.
+void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
+ int width) {
+ uintptr_t alpha = 0;
+ asm volatile (
+ "sub %0,%1 \n"
+ "vbroadcastf128 %5,%%ymm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ // replace VPGATHER
+ "movzb " MEMACCESS2(0x03,0) ",%3 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0
+ "movzb " MEMACCESS2(0x07,0) ",%3 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1
+ "movzb " MEMACCESS2(0x0b,0) ",%3 \n"
+ "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2
+ "movzb " MEMACCESS2(0x0f,0) ",%3 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3
+ "movzb " MEMACCESS2(0x13,0) ",%3 \n"
+ "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0
+ "movzb " MEMACCESS2(0x17,0) ",%3 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1
+ "movzb " MEMACCESS2(0x1b,0) ",%3 \n"
+ "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2
+ "movzb " MEMACCESS2(0x1f,0) ",%3 \n"
+ MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3
+ "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n"
+ "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n"
+ "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n"
+ "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n"
+ // end of VPGATHER
+
+ "vmovdqu " MEMACCESS(0) ",%%ymm6 \n"
+ "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
+ "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n"
+ "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n"
+ "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1)
+ "lea " MEMLEA(0x20,0) ",%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width), // %2
+ "+r"(alpha) // %3
+ : "r"(fixed_invtbl8), // %4
+ "m"(kUnattenShuffleAlpha_AVX2) // %5
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+#endif // HAS_ARGBUNATTENUATEROW_AVX2
+
#ifdef HAS_ARGBGRAYROW_SSSE3
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
@@ -4364,16 +3798,16 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
// 8 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
"phaddw %%xmm1,%%xmm0 \n"
"paddw %%xmm5,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm2 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"psrld $0x18,%%xmm2 \n"
"psrld $0x18,%%xmm3 \n"
@@ -4385,10 +3819,10 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm3,%%xmm0 \n"
"punpckhwd %%xmm3,%%xmm1 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
@@ -4396,9 +3830,7 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
: "m"(kARGBToYJ), // %3
"m"(kAddYJ64) // %4
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
);
}
#endif // HAS_ARGBGRAYROW_SSSE3
@@ -4430,30 +3862,30 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
// 8 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
"pmaddubsw %%xmm2,%%xmm0 \n"
"pmaddubsw %%xmm2,%%xmm6 \n"
"phaddw %%xmm6,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm5 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm5 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm5 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"packuswb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm5 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm5 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm5 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"packuswb %%xmm5,%%xmm5 \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm6 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"psrld $0x18,%%xmm6 \n"
"psrld $0x18,%%xmm1 \n"
"packuswb %%xmm1,%%xmm6 \n"
@@ -4462,10 +3894,10 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm5,%%xmm0 \n"
"punpckhwd %%xmm5,%%xmm1 \n"
- "sub $0x8,%1 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n"
+ "movdqu %%xmm0," MEMACCESS(0) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
+ "sub $0x8,%1 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+r"(width) // %1
@@ -4473,9 +3905,7 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
"m"(kARGBToSepiaG), // %3
"m"(kARGBToSepiaR) // %4
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
);
}
#endif // HAS_ARGBSEPIAROW_SSSE3
@@ -4495,12 +3925,12 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
// 8 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
"pmaddubsw %%xmm2,%%xmm0 \n"
"pmaddubsw %%xmm2,%%xmm7 \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm6 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm6 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"phaddsw %%xmm7,%%xmm0 \n"
@@ -4510,13 +3940,13 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"packuswb %%xmm0,%%xmm0 \n"
"packuswb %%xmm6,%%xmm6 \n"
"punpcklbw %%xmm6,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm7 \n"
"phaddsw %%xmm7,%%xmm1 \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm6 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
"pmaddubsw %%xmm5,%%xmm6 \n"
"pmaddubsw %%xmm5,%%xmm7 \n"
"phaddsw %%xmm7,%%xmm6 \n"
@@ -4528,27 +3958,24 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"movdqa %%xmm0,%%xmm6 \n"
"punpcklwd %%xmm1,%%xmm0 \n"
"punpckhwd %%xmm1,%%xmm6 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm6," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm6," MEMACCESS2(0x10,1) " \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(matrix_argb) // %3
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
);
}
#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
-// aligned to 16 bytes
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) {
asm volatile (
@@ -4568,23 +3995,23 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
// 4 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"pmulhuw %%xmm2,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm1 \n"
"pmulhuw %%xmm2,%%xmm1 \n"
"pmullw %%xmm3,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm7 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm7 \n"
"pmullw %%xmm3,%%xmm1 \n"
"pand %%xmm6,%%xmm7 \n"
"paddw %%xmm4,%%xmm0 \n"
"paddw %%xmm4,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"por %%xmm7,%%xmm0 \n"
- "sub $0x4,%1 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
+ "movdqu %%xmm0," MEMACCESS(0) " \n"
"lea " MEMLEA(0x10,0) ",%0 \n"
+ "sub $0x4,%1 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+r"(width) // %1
@@ -4592,16 +4019,13 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
"r"(interval_size), // %3
"r"(interval_offset) // %4
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
);
}
#endif // HAS_ARGBQUANTIZEROW_SSE2
#ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value.
-// Aligned to 16 bytes.
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value) {
asm volatile (
@@ -4612,7 +4036,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
// 4 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
"lea " MEMLEA(0x10,0) ",%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
@@ -4622,18 +4046,16 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(value) // %3
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2"
-#endif
);
}
#endif // HAS_ARGBSHADEROW_SSE2
@@ -4643,7 +4065,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
+ "pxor %%xmm5,%%xmm5 \n"
// 4 pixel loop.
LABELALIGN
@@ -4661,9 +4083,9 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"pmulhuw %%xmm2,%%xmm0 \n"
"pmulhuw %%xmm3,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x4,%3 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(src_argb1), // %1
@@ -4671,12 +4093,49 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"+r"(width) // %3
:
: "memory", "cc"
-#if defined(__SSE2__)
+ , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+ );
+}
+#endif // HAS_ARGBMULTIPLYROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_AVX2
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
+ uint8* dst_argb, int width) {
+ asm volatile (
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x20,0) ",%0 \n"
+ "vmovdqu " MEMACCESS(1) ",%%ymm3 \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n"
+ "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
+ "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(2) " \n"
+ "lea " MEMLEA(0x20,2) ",%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc"
+#if defined(__AVX2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
-#endif // HAS_ARGBMULTIPLYROW_SSE2
+#endif // HAS_ARGBMULTIPLYROW_AVX2
#ifdef HAS_ARGBADDROW_SSE2
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
@@ -4691,9 +4150,9 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"movdqu " MEMACCESS(1) ",%%xmm1 \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x4,%3 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(src_argb1), // %1
@@ -4701,13 +4160,39 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"+r"(width) // %3
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1"
-#endif
);
}
#endif // HAS_ARGBADDROW_SSE2
+#ifdef HAS_ARGBADDROW_AVX2
+// Add 2 rows of ARGB pixels together, 4 pixels at a time.
+void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
+ uint8* dst_argb, int width) {
+ asm volatile (
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "lea " MEMLEA(0x20,0) ",%0 \n"
+ "vpaddusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "vmovdqu %%ymm0," MEMACCESS(2) " \n"
+ "lea " MEMLEA(0x20,2) ",%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc"
+ , "xmm0"
+ );
+}
+#endif // HAS_ARGBADDROW_AVX2
+
#ifdef HAS_ARGBSUBTRACTROW_SSE2
// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
@@ -4721,9 +4206,9 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"movdqu " MEMACCESS(1) ",%%xmm1 \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"psubusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x4,%3 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(src_argb1), // %1
@@ -4731,13 +4216,39 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
"+r"(width) // %3
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1"
-#endif
);
}
#endif // HAS_ARGBSUBTRACTROW_SSE2
+#ifdef HAS_ARGBSUBTRACTROW_AVX2
+// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
+void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
+ uint8* dst_argb, int width) {
+ asm volatile (
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "lea " MEMLEA(0x20,0) ",%0 \n"
+ "vpsubusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "vmovdqu %%ymm0," MEMACCESS(2) " \n"
+ "lea " MEMLEA(0x20,2) ",%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc"
+ , "xmm0"
+ );
+}
+#endif // HAS_ARGBSUBTRACTROW_AVX2
+
#ifdef HAS_SOBELXROW_SSE2
// SobelX as a matrix is
// -1 0 1
@@ -4759,13 +4270,11 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n"
- BUNDLEALIGN
MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2
"punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n"
- BUNDLEALIGN
MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2
MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3
"punpcklbw %%xmm5,%%xmm2 \n"
@@ -4778,10 +4287,9 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"psubw %%xmm0,%%xmm1 \n"
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
- "sub $0x8,%4 \n"
- BUNDLEALIGN
MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1)
"lea " MEMLEA(0x8,0) ",%0 \n"
+ "sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
@@ -4789,13 +4297,8 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"+r"(dst_sobelx), // %3
"+r"(width) // %4
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_SOBELXROW_SSE2
@@ -4820,13 +4323,11 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n"
- BUNDLEALIGN
"movq " MEMACCESS2(0x1,0) ",%%xmm1 \n"
MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2
"punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n"
- BUNDLEALIGN
"movq " MEMACCESS2(0x2,0) ",%%xmm2 \n"
MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3
"punpcklbw %%xmm5,%%xmm2 \n"
@@ -4839,23 +4340,17 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
"psubw %%xmm0,%%xmm1 \n"
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
- "sub $0x8,%3 \n"
- BUNDLEALIGN
MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1)
"lea " MEMLEA(0x8,0) ",%0 \n"
+ "sub $0x8,%3 \n"
"jg 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
"+r"(dst_sobely), // %2
"+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_SOBELYROW_SSE2
@@ -4876,8 +4371,8 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
// 8 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
"lea " MEMLEA(0x10,0) ",%0 \n"
"paddusb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm2 \n"
@@ -4893,25 +4388,20 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
"punpckhwd %%xmm0,%%xmm0 \n"
"por %%xmm5,%%xmm3 \n"
"por %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm1," MEMACCESS(2) " \n"
- "movdqa %%xmm2," MEMACCESS2(0x10,2) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x20,2) " \n"
- "movdqa %%xmm0," MEMACCESS2(0x30,2) " \n"
+ "movdqu %%xmm1," MEMACCESS(2) " \n"
+ "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n"
+ "movdqu %%xmm3," MEMACCESS2(0x20,2) " \n"
+ "movdqu %%xmm0," MEMACCESS2(0x30,2) " \n"
"lea " MEMLEA(0x40,2) ",%2 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_SOBELROW_SSE2
@@ -4928,26 +4418,21 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
// 8 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
"lea " MEMLEA(0x10,0) ",%0 \n"
"paddusb %%xmm1,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
+ "movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_y), // %2
"+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1"
);
}
#endif // HAS_SOBELTOPLANEROW_SSE2
@@ -4967,8 +4452,8 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
// 8 pixel loop.
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
"lea " MEMLEA(0x10,0) ",%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"paddusb %%xmm1,%%xmm2 \n"
@@ -4984,25 +4469,20 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
"movdqa %%xmm1,%%xmm7 \n"
"punpcklwd %%xmm0,%%xmm7 \n"
"punpckhwd %%xmm0,%%xmm1 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm6," MEMACCESS(2) " \n"
- "movdqa %%xmm4," MEMACCESS2(0x10,2) " \n"
- "movdqa %%xmm7," MEMACCESS2(0x20,2) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x30,2) " \n"
+ "movdqu %%xmm6," MEMACCESS(2) " \n"
+ "movdqu %%xmm4," MEMACCESS2(0x10,2) " \n"
+ "movdqu %%xmm7," MEMACCESS2(0x20,2) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x30,2) " \n"
"lea " MEMLEA(0x40,2) ",%2 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#endif // HAS_SOBELXYROW_SSE2
@@ -5035,22 +4515,22 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
"punpcklwd %%xmm1,%%xmm4 \n"
"punpckhwd %%xmm1,%%xmm5 \n"
"paddd %%xmm2,%%xmm0 \n"
- "movdqa " MEMACCESS(2) ",%%xmm2 \n"
+ "movdqu " MEMACCESS(2) ",%%xmm2 \n"
"paddd %%xmm0,%%xmm2 \n"
"paddd %%xmm3,%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,2) ",%%xmm3 \n"
+ "movdqu " MEMACCESS2(0x10,2) ",%%xmm3 \n"
"paddd %%xmm0,%%xmm3 \n"
"paddd %%xmm4,%%xmm0 \n"
- "movdqa " MEMACCESS2(0x20,2) ",%%xmm4 \n"
+ "movdqu " MEMACCESS2(0x20,2) ",%%xmm4 \n"
"paddd %%xmm0,%%xmm4 \n"
"paddd %%xmm5,%%xmm0 \n"
- "movdqa " MEMACCESS2(0x30,2) ",%%xmm5 \n"
+ "movdqu " MEMACCESS2(0x30,2) ",%%xmm5 \n"
"lea " MEMLEA(0x40,2) ",%2 \n"
"paddd %%xmm0,%%xmm5 \n"
- "movdqa %%xmm2," MEMACCESS(1) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n"
- "movdqa %%xmm4," MEMACCESS2(0x20,1) " \n"
- "movdqa %%xmm5," MEMACCESS2(0x30,1) " \n"
+ "movdqu %%xmm2," MEMACCESS(1) " \n"
+ "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
+ "movdqu %%xmm4," MEMACCESS2(0x20,1) " \n"
+ "movdqu %%xmm5," MEMACCESS2(0x30,1) " \n"
"lea " MEMLEA(0x40,1) ",%1 \n"
"sub $0x4,%3 \n"
"jge 40b \n"
@@ -5082,9 +4562,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
"+r"(width) // %3
:
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
);
}
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
@@ -5115,11 +4593,10 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
// 4 pixel small loop \n"
LABELALIGN
"4: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- BUNDLEALIGN
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
+ "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
@@ -5129,7 +4606,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
"psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
"psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
@@ -5149,11 +4625,10 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
// 4 pixel loop \n"
LABELALIGN
"40: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- BUNDLEALIGN
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
+ "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
@@ -5163,7 +4638,6 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
"psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
"psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
@@ -5196,11 +4670,10 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
// 1 pixel loop \n"
LABELALIGN
"10: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
"lea " MEMLEA(0x10,0) ",%0 \n"
"psubd " MEMACCESS(1) ",%%xmm0 \n"
- BUNDLEALIGN
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
"lea " MEMLEA(0x10,1) ",%1 \n"
"cvtdq2ps %%xmm0,%%xmm0 \n"
@@ -5219,13 +4692,8 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
"+rm"(count) // %3
: "r"((intptr_t)(width)), // %4
"rm"(area) // %5
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
@@ -5268,7 +4736,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n"
- BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
"punpckldq %%xmm6,%%xmm1 \n"
@@ -5277,14 +4744,13 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"movd %%xmm0,%k1 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n"
- BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
"punpckldq %%xmm6,%%xmm0 \n"
"addps %%xmm4,%%xmm3 \n"
- "sub $0x4,%4 \n"
"movq %%xmm0," MEMACCESS2(0x08,2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x4,%4 \n"
"jge 40b \n"
"49: \n"
@@ -5299,11 +4765,10 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"pmaddwd %%xmm5,%%xmm0 \n"
"addps %%xmm7,%%xmm2 \n"
"movd %%xmm0,%k1 \n"
- BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
- "sub $0x1,%4 \n"
"movd %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x04,2) ",%2 \n"
+ "sub $0x1,%4 \n"
"jge 10b \n"
"19: \n"
: "+r"(src_argb), // %0
@@ -5313,13 +4778,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
"+rm"(width), // %4
"+r"(temp) // %5
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#endif // HAS_ARGBAFFINEROW_SSE2
@@ -5331,324 +4791,66 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int source_y_fraction) {
asm volatile (
"sub %1,%0 \n"
- "shr %3 \n"
"cmp $0x0,%3 \n"
"je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
+ "cmp $0x80,%3 \n"
"je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
"movd %3,%%xmm0 \n"
"neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm2)
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "pmaddubsw %%xmm5,%%xmm0 \n"
- "pmaddubsw %%xmm5,%%xmm1 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqa " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm0)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm5"
-#endif
- );
-}
-#endif // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm2) // movdqa (%1,%4,1),%%xmm2
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm0 \n"
- "punpckhbw %%xmm4,%%xmm1 \n"
- "psubw %%xmm0,%%xmm2 \n"
- "psubw %%xmm1,%%xmm3 \n"
- "paddw %%xmm2,%%xmm2 \n"
- "paddw %%xmm3,%%xmm3 \n"
- "pmulhw %%xmm5,%%xmm2 \n"
- "pmulhw %%xmm5,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqa " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm0) // movdqa (%1,%4,1),%%xmm0
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
+ "add $0x100,%3 \n"
"movd %3,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm5 \n"
"punpcklwd %%xmm5,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "movd %%eax,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
// General purpose row blend.
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
MEMOPREG(movdqu,0x00,1,4,1,xmm2)
- "movdqu %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "pmaddubsw %%xmm5,%%xmm0 \n"
- "pmaddubsw %%xmm5,%%xmm1 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "psubb %%xmm4,%%xmm0 \n"
+ "psubb %%xmm4,%%xmm1 \n"
+ "movdqa %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm5,%%xmm3 \n"
+ "pmaddubsw %%xmm0,%%xmm2 \n"
+ "pmaddubsw %%xmm1,%%xmm3 \n"
+ "paddw %%xmm4,%%xmm2 \n"
+ "paddw %%xmm4,%%xmm3 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "psrlw $0x8,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ MEMOPMEM(movdqu,xmm2,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
"jmp 99f \n"
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
// Blend 50 / 50.
LABELALIGN
"50: \n"
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
MEMOPREG(movdqu,0x00,1,4,1,xmm1)
"pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 50b \n"
"jmp 99f \n"
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm0)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
// Blend 100 / 0 - Copy row unchanged.
LABELALIGN
"100: \n"
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 100b \n"
"99: \n"
@@ -5657,259 +4859,94 @@ void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"+r"(dst_width), // %2
"+r"(source_y_fraction) // %3
: "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm5"
-#endif
+ : "memory", "cc", "eax", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-#endif // HAS_INTERPOLATEROW_SSSE3
+#endif // HAS_INTERPOLATEROW_SSSE3
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
+#ifdef HAS_INTERPOLATEROW_AVX2
+// Bilinear filter 32x2 -> 32x1
+void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) {
asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
"cmp $0x0,%3 \n"
"je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
+ "sub %1,%0 \n"
+ "cmp $0x80,%3 \n"
"je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
+ "vmovd %3,%%xmm0 \n"
+ "neg %3 \n"
+ "add $0x100,%3 \n"
+ "vmovd %3,%%xmm5 \n"
+ "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
+ "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
+ "vbroadcastss %%xmm5,%%ymm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "vmovd %%eax,%%xmm4 \n"
+ "vbroadcastss %%xmm4,%%ymm4 \n"
// General purpose row blend.
LABELALIGN
"1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2
- "movdqu %%xmm0,%%xmm1 \n"
- "movdqu %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm0 \n"
- "punpckhbw %%xmm4,%%xmm1 \n"
- "psubw %%xmm0,%%xmm2 \n"
- "psubw %%xmm1,%%xmm3 \n"
- "paddw %%xmm2,%%xmm2 \n"
- "paddw %%xmm3,%%xmm3 \n"
- "pmulhw %%xmm5,%%xmm2 \n"
- "pmulhw %%xmm5,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
+ "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
+ MEMOPREG(vmovdqu,0x00,1,4,1,ymm2)
+ "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
+ "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"
+ "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"
+ "vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
"jg 1b \n"
"jmp 99f \n"
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
// Blend 50 / 50.
LABELALIGN
"50: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
+ "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
+ VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0
+ MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
"jg 50b \n"
"jmp 99f \n"
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
// Blend 100 / 0 - Copy row unchanged.
LABELALIGN
"100: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 100b \n"
+ "rep movsb " MEMMOVESTRING(1,0) " \n"
+ "jmp 999f \n"
"99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
+ "vzeroupper \n"
+ "999: \n"
+ : "+D"(dst_ptr), // %0
+ "+S"(src_ptr), // %1
+ "+c"(dst_width), // %2
"+r"(source_y_fraction) // %3
: "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "eax", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"
);
}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-#ifdef HAS_HALFROW_SSE2
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- asm volatile (
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3),%%xmm0
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqa,xmm0,0x00,0,1,1) // movdqa %%xmm0,(%0,%1)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "jg 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_uv), // %1
- "+r"(pix) // %2
- : "r"((intptr_t)(src_uv_stride)) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0"
-#endif
- );
-}
-#endif // HAS_HALFROW_SSE2
-
-#ifdef HAS_ARGBTOBAYERROW_SSSE3
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- asm volatile (
- // NaCL caveat - assumes movd is from GPR
- "movd %3,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- "sub $0x8,%2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- : "g"(selector) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBTOBAYERROW_SSSE3
-
-#ifdef HAS_ARGBTOBAYERGGROW_SSE2
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x18,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrld $0x8,%%xmm0 \n"
- "psrld $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x8,%2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBTOBAYERGGROW_SSE2
+#endif // HAS_INTERPOLATEROW_AVX2
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
asm volatile (
- "movdqa " MEMACCESS(3) ",%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- "movdqa " MEMACCESS(3) ",%%xmm5 \n"
+ "movdqu " MEMACCESS(3) ",%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
@@ -5917,19 +4954,17 @@ void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
"lea " MEMLEA(0x20,0) ",%0 \n"
"pshufb %%xmm5,%%xmm0 \n"
"pshufb %%xmm5,%%xmm1 \n"
- "sub $0x8,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "r"(shuffler) // %3
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm5"
-#endif
);
}
#endif // HAS_ARGBSHUFFLEROW_SSSE3
@@ -5937,7 +4972,7 @@ void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBSHUFFLEROW_AVX2
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
asm volatile (
"vbroadcastf128 " MEMACCESS(3) ",%%ymm5 \n"
LABELALIGN
@@ -5947,19 +4982,18 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
"lea " MEMLEA(0x40,0) ",%0 \n"
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
"vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
- "sub $0x10,%2 \n"
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
"vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
"lea " MEMLEA(0x40,1) ",%1 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "r"(shuffler) // %3
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm5"
-#endif
);
}
#endif // HAS_ARGBSHUFFLEROW_AVX2
@@ -5967,7 +5001,7 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBSHUFFLEROW_SSE2
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
uintptr_t pixel_temp = 0u;
asm volatile (
"pxor %%xmm5,%%xmm5 \n"
@@ -5989,7 +5023,6 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"movzb " MEMACCESS2(0x1,4) ",%2 \n"
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
"mov %b2," MEMACCESS2(0x1,1) " \n"
- BUNDLEALIGN
"movzb " MEMACCESS2(0x2,4) ",%2 \n"
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
"mov %b2," MEMACCESS2(0x2,1) " \n"
@@ -6014,9 +5047,9 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"pshufhw $0x1b,%%xmm1,%%xmm1 \n"
"pshuflw $0x1b,%%xmm1,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%3 \n"
"jg 123b \n"
"jmp 99f \n"
@@ -6032,9 +5065,9 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"pshufhw $0x39,%%xmm1,%%xmm1 \n"
"pshuflw $0x39,%%xmm1,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%3 \n"
"jg 321b \n"
"jmp 99f \n"
@@ -6050,9 +5083,9 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"pshufhw $0x93,%%xmm1,%%xmm1 \n"
"pshuflw $0x93,%%xmm1,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%3 \n"
"jg 2103b \n"
"jmp 99f \n"
@@ -6068,24 +5101,19 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"pshufhw $0xc6,%%xmm1,%%xmm1 \n"
"pshuflw $0xc6,%%xmm1,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%3 \n"
"jg 3012b \n"
"99: \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+d"(pixel_temp), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
: "r"(shuffler) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm5"
);
}
#endif // HAS_ARGBSHUFFLEROW_SSE2
@@ -6119,13 +5147,8 @@ void I422ToYUY2Row_SSE2(const uint8* src_y,
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3"
);
}
#endif // HAS_I422TOYUY2ROW_SSE2
@@ -6159,13 +5182,8 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3"
);
}
#endif // HAS_I422TOUYVYROW_SSE2
@@ -6212,18 +5230,16 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
"cvttps2dq %%xmm4,%%xmm4 \n"
"packuswb %%xmm4,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
- "sub $0x2,%2 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x2,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(poly) // %3
: "memory", "cc"
-#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
);
}
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
@@ -6253,20 +5269,17 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
"vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpackuswb %%xmm0,%%xmm0,%%xmm0 \n"
- "sub $0x2,%2 \n"
"vmovq %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x8,1) ",%1 \n"
+ "sub $0x2,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(poly) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
-// TODO(fbarchard): declare ymm usage when applicable.
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#endif // HAS_ARGBPOLYNOMIALROW_AVX2
@@ -6376,7 +5389,6 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"movzb " MEMACCESS2(0x4,2) ",%0 \n"
MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
"mov %b0," MEMACCESS2(0x4,3) " \n"
- BUNDLEALIGN
"movzb " MEMACCESS2(0x5,2) ",%0 \n"
MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
"mov %b0," MEMACCESS2(0x5,3) " \n"
@@ -6416,9 +5428,9 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"mov %b0," MEMACCESS2(0xe,3) " \n"
"movzb " MEMACCESS2(0xf,2) ",%0 \n"
"mov %b0," MEMACCESS2(0xf,3) " \n"
- "sub $0x4,%4 \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
"lea " MEMLEA(0x10,3) ",%3 \n"
+ "sub $0x4,%4 \n"
"jg 1b \n"
: "+d"(pixel_temp), // %0
"+a"(table_temp), // %1
@@ -6427,10 +5439,7 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
"+rm"(width) // %4
: "r"(luma), // %5
"rm"(lumacoeff) // %6
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm3", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
diff --git a/TMessagesProj/jni/libyuv/source/row_mips.cc b/TMessagesProj/jni/libyuv/source/row_mips.cc
index da7183bc1..d12cf6ab7 100644
--- a/TMessagesProj/jni/libyuv/source/row_mips.cc
+++ b/TMessagesProj/jni/libyuv/source/row_mips.cc
@@ -389,7 +389,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
- ".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
@@ -447,89 +446,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
);
}
-void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
- uint8* dst_v, int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "blez $t4, 2f \n"
- " andi %[width], %[width], 0xf \n" // residual
-
- ".p2align 2 \n"
- "1: \n"
- "addiu $t4, $t4, -1 \n"
- "lwr $t0, 0(%[src_uv]) \n"
- "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
- "lwr $t1, 4(%[src_uv]) \n"
- "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
- "lwr $t2, 8(%[src_uv]) \n"
- "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
- "lwr $t3, 12(%[src_uv]) \n"
- "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
- "lwr $t5, 16(%[src_uv]) \n"
- "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
- "lwr $t6, 20(%[src_uv]) \n"
- "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
- "lwr $t7, 24(%[src_uv]) \n"
- "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
- "lwr $t8, 28(%[src_uv]) \n"
- "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
- "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
- "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
- "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
- "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
- "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
- "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
- "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
- "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
- "addiu %[src_uv], %[src_uv], 32 \n"
- "swr $t9, 0(%[dst_v]) \n"
- "swl $t9, 3(%[dst_v]) \n"
- "swr $t0, 0(%[dst_u]) \n"
- "swl $t0, 3(%[dst_u]) \n"
- "swr $t1, 4(%[dst_v]) \n"
- "swl $t1, 7(%[dst_v]) \n"
- "swr $t2, 4(%[dst_u]) \n"
- "swl $t2, 7(%[dst_u]) \n"
- "swr $t3, 8(%[dst_v]) \n"
- "swl $t3, 11(%[dst_v]) \n"
- "swr $t5, 8(%[dst_u]) \n"
- "swl $t5, 11(%[dst_u]) \n"
- "swr $t6, 12(%[dst_v]) \n"
- "swl $t6, 15(%[dst_v]) \n"
- "swr $t7, 12(%[dst_u]) \n"
- "swl $t7, 15(%[dst_u]) \n"
- "addiu %[dst_u], %[dst_u], 16 \n"
- "bgtz $t4, 1b \n"
- " addiu %[dst_v], %[dst_v], 16 \n"
-
- "beqz %[width], 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, 0(%[src_uv]) \n"
- "lbu $t1, 1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], 2 \n"
- "addiu %[width], %[width], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[width], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r" (src_uv),
- [width] "+r" (width),
- [dst_u] "+r" (dst_u),
- [dst_v] "+r" (dst_v)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6", "t7", "t8", "t9"
- );
-}
-
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
__asm__ __volatile__ (
".set push \n"
@@ -540,7 +456,6 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
"blez $t4, 2f \n"
" addu %[src], %[src], %[width] \n" // src += width
- ".p2align 2 \n"
"1: \n"
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
@@ -595,7 +510,6 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"blez %[x], 2f \n"
" addu %[src_uv], %[src_uv], $t4 \n"
- ".p2align 2 \n"
"1: \n"
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
@@ -679,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
-#define I422ToTransientMipsRGB \
+#define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
@@ -738,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
+// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@@ -756,9 +672,8 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
- ".p2align 2 \n"
"1: \n"
- I422ToTransientMipsRGB
+ YUVTORGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
@@ -800,136 +715,10 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
);
}
-void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128|
- "lui $s6, 0xff00 \n"
- "ori $s6, 0xff00 \n" // |ff|00|ff|00|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
-// Arranging into abgr format
- "precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
- "precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
- "precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
- "precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
-
- "precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
- "addiu %[width], -4 \n"
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
- "or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
- "or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
- "precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
- "precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
- "sll $t9, $t9, 16 \n"
- "sll $t8, $t8, 16 \n"
- "packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
- "packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128|
- "lui $s6, 0xff \n"
- "ori $s6, 0xff \n" // |00|ff|00|ff|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
- // Arranging into bgra format
- "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
- "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
- "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
- "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
-
- "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
- "addiu %[width], -4 \n"
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
- "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
- "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
- "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
- "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
- "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
- "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
- "sll $t1, $t1, 16 \n"
- "sll $t2, $t2, 16 \n"
- "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
- "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
// Bilinear filter 8x2 -> 8x1
-void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
+void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
@@ -940,7 +729,6 @@ void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
- ".p2align 2 \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"
diff --git a/TMessagesProj/jni/libyuv/source/row_neon.cc b/TMessagesProj/jni/libyuv/source/row_neon.cc
index 1392cf5fc..5b4ff3b5a 100644
--- a/TMessagesProj/jni/libyuv/source/row_neon.cc
+++ b/TMessagesProj/jni/libyuv/source/row_neon.cc
@@ -16,7 +16,8 @@ extern "C" {
#endif
// This module is for GCC Neon
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
// Read 8 Y, 4 U and 4 V from 422
#define READYUV422 \
@@ -92,56 +93,60 @@ extern "C" {
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
-#define YUV422TORGB \
- "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\
- "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\
- "vmull.s8 q9, d2, d25 \n"/* u/v G component */\
- "vmov.u8 d1, #0 \n"/* split odd/even y apart */\
- "vtrn.u8 d0, d1 \n" \
- "vsub.s16 q0, q0, q15 \n"/* offset y */\
- "vmul.s16 q0, q0, q14 \n" \
- "vadd.s16 d18, d19 \n" \
- "vqadd.s16 d20, d0, d16 \n" /* B */ \
- "vqadd.s16 d21, d1, d16 \n" \
- "vqadd.s16 d22, d0, d17 \n" /* R */ \
- "vqadd.s16 d23, d1, d17 \n" \
- "vqadd.s16 d16, d0, d18 \n" /* G */ \
- "vqadd.s16 d17, d1, d18 \n" \
- "vqshrun.s16 d0, q10, #6 \n" /* B */ \
- "vqshrun.s16 d1, q11, #6 \n" /* G */ \
- "vqshrun.s16 d2, q8, #6 \n" /* R */ \
- "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\
- "vmovl.u8 q11, d1 \n" \
- "vmovl.u8 q8, d2 \n" \
- "vtrn.u8 d20, d21 \n" \
- "vtrn.u8 d22, d23 \n" \
- "vtrn.u8 d16, d17 \n" \
- "vmov.u8 d21, d16 \n"
+#define YUVTORGB_SETUP \
+ MEMACCESS([kUVToRB]) \
+ "vld1.8 {d24}, [%[kUVToRB]] \n" \
+ MEMACCESS([kUVToG]) \
+ "vld1.8 {d25}, [%[kUVToG]] \n" \
+ MEMACCESS([kUVBiasBGR]) \
+ "vld1.16 {d26[], d27[]}, [%[kUVBiasBGR]]! \n" \
+ MEMACCESS([kUVBiasBGR]) \
+ "vld1.16 {d8[], d9[]}, [%[kUVBiasBGR]]! \n" \
+ MEMACCESS([kUVBiasBGR]) \
+ "vld1.16 {d28[], d29[]}, [%[kUVBiasBGR]] \n" \
+ MEMACCESS([kYToRgb]) \
+ "vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
-static vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-static vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
- 0, 0, 0, 0, 0, 0, 0, 0 };
+#define YUVTORGB \
+ "vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\
+ "vmull.u8 q9, d2, d25 \n" /* u/v G component */\
+ "vmovl.u8 q0, d0 \n" /* Y */\
+ "vmovl.s16 q10, d1 \n" \
+ "vmovl.s16 q0, d0 \n" \
+ "vmul.s32 q10, q10, q15 \n" \
+ "vmul.s32 q0, q0, q15 \n" \
+ "vqshrun.s32 d0, q0, #16 \n" \
+ "vqshrun.s32 d1, q10, #16 \n" /* Y */\
+ "vadd.s16 d18, d19 \n" \
+ "vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */\
+ "vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */\
+ "vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/\
+ "vaddw.u16 q1, q1, d16 \n" \
+ "vaddw.u16 q10, q10, d17 \n" \
+ "vaddw.u16 q3, q3, d18 \n" \
+ "vqadd.s16 q8, q0, q13 \n" /* B */ \
+ "vqadd.s16 q9, q0, q14 \n" /* R */ \
+ "vqadd.s16 q0, q0, q4 \n" /* G */ \
+ "vqadd.s16 q8, q8, q1 \n" /* B */ \
+ "vqadd.s16 q9, q9, q10 \n" /* R */ \
+ "vqsub.s16 q0, q0, q3 \n" /* G */ \
+ "vqshrun.s16 d20, q8, #6 \n" /* B */ \
+ "vqshrun.s16 d22, q9, #6 \n" /* R */ \
+ "vqshrun.s16 d21, q0, #6 \n" /* G */
void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
"1: \n"
READYUV444
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
MEMACCESS(3)
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n"
@@ -150,9 +155,11 @@ void I444ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -161,21 +168,15 @@ void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
MEMACCESS(3)
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n"
@@ -184,9 +185,44 @@ void I422ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+void I422AlphaToARGBRow_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ const uint8* src_a,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "1: \n"
+ READYUV422
+ YUVTORGB
+ "subs %5, %5, #8 \n"
+ MEMACCESS(3)
+ "vld1.8 {d23}, [%3]! \n"
+ MEMACCESS(4)
+ "vst4.8 {d20, d21, d22, d23}, [%4]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(src_a), // %3
+ "+r"(dst_argb), // %4
+ "+r"(width) // %5
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -195,21 +231,15 @@ void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
"1: \n"
READYUV411
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
MEMACCESS(3)
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n"
@@ -218,79 +248,11 @@ void I411ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d19, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_bgra), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_abgr), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -299,21 +261,15 @@ void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
- "vmov.u8 d19, #255 \n"
+ "vmov.u8 d19, #255 \n" // d19 modified by YUVTORGB
MEMACCESS(3)
"vst4.8 {d19, d20, d21, d22}, [%3]! \n"
"bgt 1b \n"
@@ -322,9 +278,11 @@ void I422ToRGBARow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgba), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -333,19 +291,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
MEMACCESS(3)
"vst3.8 {d20, d21, d22}, [%3]! \n"
@@ -355,43 +307,11 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgb24), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width) {
- asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- MEMACCESS(3)
- "vst3.8 {d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_raw), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -412,19 +332,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
ARGBTORGB565
MEMACCESS(3)
@@ -435,9 +349,11 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgb565), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -461,19 +377,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
ARGBTOARGB1555
@@ -485,9 +395,11 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb1555), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -505,20 +417,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
+ YUVTORGB_SETUP
"vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
ARGBTOARGB4444
@@ -530,39 +436,11 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb4444), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void YToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- asm volatile (
- MEMACCESS(3)
- "vld1.8 {d24}, [%3] \n"
- MEMACCESS(4)
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV400
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -571,7 +449,31 @@ void I400ToARGBRow_NEON(const uint8* src_y,
uint8* dst_argb,
int width) {
asm volatile (
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n"
+ READYUV400
+ YUVTORGB
+ "subs %2, %2, #8 \n"
+ MEMACCESS(1)
+ "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
+ [kUVToG]"r"(&kYuvI601Constants.kUVToG),
+ [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvI601Constants.kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+void J400ToARGBRow_NEON(const uint8* src_y,
+ uint8* dst_argb,
+ int width) {
+ asm volatile (
"vmov.u8 d23, #255 \n"
"1: \n"
MEMACCESS(0)
@@ -593,21 +495,15 @@ void I400ToARGBRow_NEON(const uint8* src_y,
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
"1: \n"
READNV12
- YUV422TORGB
+ YUVTORGB
"subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
MEMACCESS(2)
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
"bgt 1b \n"
@@ -615,41 +511,39 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
+ const uint8* src_vu,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
"1: \n"
READNV21
- YUV422TORGB
+ YUVTORGB
"subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
MEMACCESS(2)
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
- "+r"(src_uv), // %1
+ "+r"(src_vu), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -657,19 +551,13 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READNV12
- YUV422TORGB
+ YUVTORGB
"subs %3, %3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
@@ -679,101 +567,63 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(3)
- "vld1.8 {d24}, [%3] \n"
- MEMACCESS(4)
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
"1: \n"
READYUY2
- YUV422TORGB
+ YUVTORGB
"subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
MEMACCESS(1)
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(3)
- "vld1.8 {d24}, [%3] \n"
- MEMACCESS(4)
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
"1: \n"
READUYVY
- YUV422TORGB
+ YUVTORGB
"subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
MEMACCESS(1)
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -782,7 +632,6 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
@@ -805,7 +654,6 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load U
@@ -828,7 +676,6 @@ void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
@@ -844,12 +691,28 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
);
}
-// SetRow8 writes 'count' bytes using a 32 bit value repeated.
-void SetRow_NEON(uint8* dst, uint32 v32, int count) {
+// SetRow writes 'count' bytes using an 8 bit value repeated.
+void SetRow_NEON(uint8* dst, uint8 v8, int count) {
+ asm volatile (
+ "vdup.8 q0, %2 \n" // duplicate 16 bytes
+ "1: \n"
+ "subs %1, %1, #16 \n" // 16 bytes per loop
+ MEMACCESS(0)
+ "vst1.8 {q0}, [%0]! \n" // store
+ "bgt 1b \n"
+ : "+r"(dst), // %0
+ "+r"(count) // %1
+ : "r"(v8) // %2
+ : "cc", "memory", "q0"
+ );
+}
+
+// ARGBSetRow writes 'count' pixels using an 32 bit value repeated.
+void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
asm volatile (
"vdup.u32 q0, %2 \n" // duplicate 4 ints
- "1: \n"
- "subs %1, %1, #16 \n" // 16 bytes per loop
+ "1: \n"
+ "subs %1, %1, #4 \n" // 4 pixels per loop
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n" // store
"bgt 1b \n"
@@ -860,16 +723,6 @@ void SetRow_NEON(uint8* dst, uint32 v32, int count) {
);
}
-// TODO(fbarchard): Make fully assembler
-// SetRow32 writes 'count' words using a 32 bit value repeated.
-void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- for (int y = 0; y < height; ++y) {
- SetRow_NEON(dst, v32, width << 2);
- dst += dst_stride;
- }
-}
-
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
asm volatile (
// Start at end of source row.
@@ -877,7 +730,6 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
"add %0, %0, %2 \n"
"sub %0, #16 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
@@ -904,7 +756,6 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"add %0, %0, %3, lsl #1 \n"
"sub %0, #16 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
@@ -931,7 +782,6 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
"add %0, %0, %2, lsl #2 \n"
"sub %0, #16 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
@@ -950,10 +800,9 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
);
}
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) {
asm volatile (
"vmov.u8 d4, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24.
@@ -963,16 +812,15 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
}
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
+void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) {
asm volatile (
"vmov.u8 d4, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
@@ -983,12 +831,30 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
}
+void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
+ asm volatile (
+ "1: \n"
+ MEMACCESS(0)
+ "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vswp.u8 d1, d3 \n" // swap R, B
+ MEMACCESS(1)
+ "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24.
+ "bgt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d1", "d2", "d3" // Clobber List
+ );
+}
+
#define RGB565TOARGB \
"vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
"vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
@@ -1001,10 +867,9 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
"vorr.u8 d2, d1, d5 \n" /* R */ \
"vorr.u8 d1, d4, d6 \n" /* G */
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
asm volatile (
"vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
@@ -1015,7 +880,7 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
"bgt 1b \n"
: "+r"(src_rgb565), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
@@ -1049,10 +914,9 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
"vorr.u8 d1, d4, d6 \n" /* G */
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
+ int width) {
asm volatile (
"vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
@@ -1063,7 +927,7 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
"bgt 1b \n"
: "+r"(src_argb1555), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
@@ -1080,10 +944,9 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
"vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
+ int width) {
asm volatile (
"vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
@@ -1094,15 +957,14 @@ void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
"bgt 1b \n"
: "+r"(src_argb4444), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2" // Clobber List
);
}
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
+void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
@@ -1112,15 +974,14 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb24), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
}
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
+void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
@@ -1131,15 +992,14 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_raw), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
}
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
+void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
@@ -1149,15 +1009,14 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1" // Clobber List
);
}
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
+void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
@@ -1167,16 +1026,15 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1" // Clobber List
);
}
void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
@@ -1189,16 +1047,15 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
: "+r"(src_yuy2), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
);
}
void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
@@ -1211,17 +1068,16 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
: "+r"(src_uyvy), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
);
}
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // stride + src_yuy2
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
@@ -1239,17 +1095,16 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
"+r"(stride_yuy2), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
);
}
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // stride + src_uyvy
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
@@ -1267,81 +1122,15 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
"+r"(stride_uyvy), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
);
}
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %0 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels.
- "subs %3, %3, #16 \n" // 16 processed per loop
- MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels.
- "vrhadd.u8 q0, q1 \n" // average row 1 and 2
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(src_uv_stride), // %1
- "+r"(dst_uv), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
-void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- asm volatile (
- "vmov.u32 d6[0], %3 \n" // selector
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop
- "vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels
- "vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels
- "vtrn.u32 d4, d5 \n" // combine 8 pixels
- MEMACCESS(1)
- "vst1.8 {d4}, [%1]! \n" // store 8.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- : "r"(selector) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-// Select G channels from ARGB. e.g. GGGGGGGG
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 /*selector*/, int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop
- MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // store 8 G's.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
asm volatile (
MEMACCESS(3)
"vld1.8 {q2}, [%3] \n" // shuffler
@@ -1356,7 +1145,7 @@ void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "r"(shuffler) // %3
: "cc", "memory", "q0", "q1", "q2" // Clobber List
);
@@ -1367,7 +1156,6 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
const uint8* src_v,
uint8* dst_yuy2, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys
@@ -1394,7 +1182,6 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
const uint8* src_v,
uint8* dst_uyvy, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys
@@ -1416,9 +1203,8 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
);
}
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
+void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
@@ -1429,16 +1215,38 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb565), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q8", "q9", "q10", "q11"
);
}
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
- int pix) {
+void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width) {
+ asm volatile (
+ "vdup.32 d2, %2 \n" // dither4
+ "1: \n"
+ MEMACCESS(1)
+ "vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB.
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vqadd.u8 d20, d20, d2 \n"
+ "vqadd.u8 d21, d21, d2 \n"
+ "vqadd.u8 d22, d22, d2 \n"
+ ARGBTORGB565
+ MEMACCESS(0)
+ "vst1.8 {q0}, [%0]! \n" // store 8 pixels RGB565.
+ "bgt 1b \n"
+ : "+r"(dst_rgb) // %0
+ : "r"(src_argb), // %1
+ "r"(dither4), // %2
+ "r"(width) // %3
+ : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11"
+ );
+}
+
+void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
+ int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
@@ -1449,17 +1257,16 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb1555), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q8", "q9", "q10", "q11"
);
}
void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
- int pix) {
+ int width) {
asm volatile (
"vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
@@ -1470,19 +1277,18 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb4444), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q8", "q9", "q10", "q11"
);
}
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
@@ -1497,18 +1303,17 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q12", "q13"
);
}
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
"vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
@@ -1522,7 +1327,7 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q12", "q13"
);
@@ -1530,7 +1335,7 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
// 8x1 pixels.
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
"vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient
"vmov.u8 d25, #74 \n" // UG -0.5781 coefficient
@@ -1538,7 +1343,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"vmov.u8 d27, #18 \n" // VB -0.1406 coefficient
"vmov.u8 d28, #94 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
@@ -1564,15 +1368,15 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
);
}
-// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
@@ -1580,7 +1384,6 @@ void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@@ -1613,16 +1416,16 @@ void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32.
+// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
@@ -1630,7 +1433,6 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@@ -1677,14 +1479,14 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#define RGBTOUV(QB, QG, QR) \
"vmul.s16 q8, " #QB ", q10 \n" /* B */ \
"vmls.s16 q8, " #QG ", q11 \n" /* G */ \
@@ -1699,7 +1501,7 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -1708,7 +1510,6 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@@ -1740,7 +1541,7 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
"+r"(src_stride_argb), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
@@ -1749,7 +1550,7 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
// TODO(fbarchard): Subsample match C code.
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
@@ -1758,7 +1559,6 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@@ -1790,7 +1590,7 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
"+r"(src_stride_argb), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
@@ -1798,7 +1598,7 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
}
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_bgra
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -1807,7 +1607,6 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
@@ -1839,7 +1638,7 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
"+r"(src_stride_bgra), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
@@ -1847,7 +1646,7 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
}
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_abgr
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -1856,7 +1655,6 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
@@ -1888,7 +1686,7 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
"+r"(src_stride_abgr), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
@@ -1896,7 +1694,7 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
}
void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_rgba
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -1905,7 +1703,6 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
@@ -1937,7 +1734,7 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
"+r"(src_stride_rgba), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
@@ -1945,7 +1742,7 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
}
void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_rgb24
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -1954,7 +1751,6 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
@@ -1986,7 +1782,7 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
"+r"(src_stride_rgb24), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
@@ -1994,7 +1790,7 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
}
void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_raw
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -2003,7 +1799,6 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
@@ -2035,16 +1830,16 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
"+r"(src_stride_raw), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -2053,7 +1848,6 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
@@ -2105,16 +1899,16 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
"+r"(src_stride_rgb565), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -2123,7 +1917,6 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
@@ -2175,16 +1968,16 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
"+r"(src_stride_argb1555), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
@@ -2193,7 +1986,6 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
@@ -2245,20 +2037,19 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
"+r"(src_stride_argb4444), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
@@ -2274,19 +2065,18 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_rgb565), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
);
}
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
+void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
@@ -2302,19 +2092,18 @@ void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_argb1555), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
);
}
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
+void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
@@ -2330,19 +2119,18 @@ void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_argb4444), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
);
}
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA.
@@ -2357,19 +2145,18 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_bgra), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR.
@@ -2384,19 +2171,18 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA.
@@ -2411,19 +2197,18 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
@@ -2438,19 +2223,18 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
@@ -2465,7 +2249,7 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
@@ -2475,16 +2259,13 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
void InterpolateRow_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride,
int dst_width, int source_y_fraction) {
+ int y1_fraction = source_y_fraction;
asm volatile (
"cmp %4, #0 \n"
"beq 100f \n"
"add %2, %1 \n"
- "cmp %4, #64 \n"
- "beq 75f \n"
"cmp %4, #128 \n"
"beq 50f \n"
- "cmp %4, #192 \n"
- "beq 25f \n"
"vdup.8 d5, %4 \n"
"rsb %4, #256 \n"
@@ -2507,20 +2288,6 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"bgt 1b \n"
"b 99f \n"
- // Blend 25 / 75.
- "25: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 25b \n"
- "b 99f \n"
-
// Blend 50 / 50.
"50: \n"
MEMACCESS(1)
@@ -2534,20 +2301,6 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"bgt 50b \n"
"b 99f \n"
- // Blend 75 / 25.
- "75: \n"
- MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q0}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 75b \n"
- "b 99f \n"
-
// Blend 100 / 0 - Copy row unchanged.
"100: \n"
MEMACCESS(1)
@@ -2562,7 +2315,7 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"+r"(src_ptr), // %1
"+r"(src_stride), // %2
"+r"(dst_width), // %3
- "+r"(source_y_fraction) // %4
+ "+r"(y1_fraction) // %4
:
: "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
);
@@ -2669,7 +2422,6 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
"vdup.u16 q10, %4 \n" // interval add
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB.
@@ -2712,7 +2464,6 @@ void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
"vshr.u16 q0, q0, #1 \n" // scale / 2.
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB.
@@ -2748,7 +2499,6 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
"vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
@@ -2785,7 +2535,6 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
"vmov.u8 d28, #24 \n" // BB coefficient
"vmov.u8 d29, #98 \n" // BG coefficient
"vmov.u8 d30, #50 \n" // BR coefficient
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels.
@@ -2824,7 +2573,6 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
"vmovl.s8 q0, d4 \n" // B,G coefficients s16.
"vmovl.s8 q1, d5 \n" // R,A coefficients s16.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels.
@@ -2832,7 +2580,7 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
"vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit
"vmovl.u8 q9, d18 \n" // g
"vmovl.u8 q10, d20 \n" // r
- "vmovl.u8 q15, d22 \n" // a
+ "vmovl.u8 q11, d22 \n" // a
"vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B
"vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G
"vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R
@@ -2853,10 +2601,10 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
"vqadd.s16 q15, q15, q7 \n" // Accumulate A
- "vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B
- "vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G
- "vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R
- "vmul.s16 q7, q15, d3[3] \n" // A += A * Matrix A
+ "vmul.s16 q4, q11, d0[3] \n" // B += A * Matrix B
+ "vmul.s16 q5, q11, d1[3] \n" // G += A * Matrix G
+ "vmul.s16 q6, q11, d2[3] \n" // R += A * Matrix R
+ "vmul.s16 q7, q11, d3[3] \n" // A += A * Matrix A
"vqadd.s16 q12, q12, q4 \n" // Accumulate B
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
@@ -2872,7 +2620,7 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(matrix_argb) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
+ : "cc", "memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15"
);
}
@@ -2884,7 +2632,6 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@@ -2918,7 +2665,6 @@ void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
@@ -2945,7 +2691,6 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
@@ -2977,7 +2722,6 @@ void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
asm volatile (
"vmov.u8 d3, #255 \n" // alpha
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
@@ -3004,7 +2748,6 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width) {
asm volatile (
// 16 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
@@ -3034,7 +2777,6 @@ void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
asm volatile (
"vmov.u8 d3, #255 \n" // alpha
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
@@ -3061,7 +2803,6 @@ void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0],%5 \n" // top
@@ -3105,7 +2846,6 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0],%4 \n" // left
@@ -3140,7 +2880,7 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
: "cc", "memory", "q0", "q1" // Clobber List
);
}
-#endif // __ARM_NEON__
+#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/row_neon64.cc b/TMessagesProj/jni/libyuv/source/row_neon64.cc
index 952e10d73..6fe5a1080 100644
--- a/TMessagesProj/jni/libyuv/source/row_neon64.cc
+++ b/TMessagesProj/jni/libyuv/source/row_neon64.cc
@@ -15,146 +15,146 @@ namespace libyuv {
extern "C" {
#endif
-// This module is for GCC Neon
+// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
// Read 8 Y, 4 U and 4 V from 422
#define READYUV422 \
MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
+ "ld1 {v0.8b}, [%0], #8 \n" \
MEMACCESS(1) \
- "vld1.32 {d2[0]}, [%1]! \n" \
+ "ld1 {v1.s}[0], [%1], #4 \n" \
MEMACCESS(2) \
- "vld1.32 {d2[1]}, [%2]! \n"
+ "ld1 {v1.s}[1], [%2], #4 \n"
// Read 8 Y, 2 U and 2 V from 422
#define READYUV411 \
MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
+ "ld1 {v0.8b}, [%0], #8 \n" \
MEMACCESS(1) \
- "vld1.16 {d2[0]}, [%1]! \n" \
+ "ld1 {v2.h}[0], [%1], #2 \n" \
MEMACCESS(2) \
- "vld1.16 {d2[1]}, [%2]! \n" \
- "vmov.u8 d3, d2 \n" \
- "vzip.u8 d2, d3 \n"
+ "ld1 {v2.h}[1], [%2], #2 \n" \
+ "zip1 v1.8b, v2.8b, v2.8b \n"
// Read 8 Y, 8 U and 8 V from 444
#define READYUV444 \
MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
+ "ld1 {v0.8b}, [%0], #8 \n" \
MEMACCESS(1) \
- "vld1.8 {d2}, [%1]! \n" \
+ "ld1 {v1.d}[0], [%1], #8 \n" \
MEMACCESS(2) \
- "vld1.8 {d3}, [%2]! \n" \
- "vpaddl.u8 q1, q1 \n" \
- "vrshrn.u16 d2, q1, #1 \n"
+ "ld1 {v1.d}[1], [%2], #8 \n" \
+ "uaddlp v1.8h, v1.16b \n" \
+ "rshrn v1.8b, v1.8h, #1 \n"
// Read 8 Y, and set 4 U and 4 V to 128
#define READYUV400 \
MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
- "vmov.u8 d2, #128 \n"
+ "ld1 {v0.8b}, [%0], #8 \n" \
+ "movi v1.8b , #128 \n"
// Read 8 Y and 4 UV from NV12
#define READNV12 \
MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
+ "ld1 {v0.8b}, [%0], #8 \n" \
MEMACCESS(1) \
- "vld1.8 {d2}, [%1]! \n" \
- "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
+ "ld1 {v2.8b}, [%1], #8 \n" \
+ "uzp1 v1.8b, v2.8b, v2.8b \n" \
+ "uzp2 v3.8b, v2.8b, v2.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
// Read 8 Y and 4 VU from NV21
#define READNV21 \
MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
+ "ld1 {v0.8b}, [%0], #8 \n" \
MEMACCESS(1) \
- "vld1.8 {d2}, [%1]! \n" \
- "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
- "vuzp.u8 d3, d2 \n" \
- "vtrn.u32 d2, d3 \n"
+ "ld1 {v2.8b}, [%1], #8 \n" \
+ "uzp1 v3.8b, v2.8b, v2.8b \n" \
+ "uzp2 v1.8b, v2.8b, v2.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
// Read 8 YUY2
#define READYUY2 \
MEMACCESS(0) \
- "vld2.8 {d0, d2}, [%0]! \n" \
- "vmov.u8 d3, d2 \n" \
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
+ "ld2 {v0.8b, v1.8b}, [%0], #16 \n" \
+ "uzp2 v3.8b, v1.8b, v1.8b \n" \
+ "uzp1 v1.8b, v1.8b, v1.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
// Read 8 UYVY
#define READUYVY \
MEMACCESS(0) \
- "vld2.8 {d2, d3}, [%0]! \n" \
- "vmov.u8 d0, d3 \n" \
- "vmov.u8 d3, d2 \n" \
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
+ "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \
+ "orr v0.8b, v3.8b, v3.8b \n" \
+ "uzp1 v1.8b, v2.8b, v2.8b \n" \
+ "uzp2 v3.8b, v2.8b, v2.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
-#define YUV422TORGB \
- "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\
- "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\
- "vmull.s8 q9, d2, d25 \n"/* u/v G component */\
- "vmov.u8 d1, #0 \n"/* split odd/even y apart */\
- "vtrn.u8 d0, d1 \n" \
- "vsub.s16 q0, q0, q15 \n"/* offset y */\
- "vmul.s16 q0, q0, q14 \n" \
- "vadd.s16 d18, d19 \n" \
- "vqadd.s16 d20, d0, d16 \n" /* B */ \
- "vqadd.s16 d21, d1, d16 \n" \
- "vqadd.s16 d22, d0, d17 \n" /* R */ \
- "vqadd.s16 d23, d1, d17 \n" \
- "vqadd.s16 d16, d0, d18 \n" /* G */ \
- "vqadd.s16 d17, d1, d18 \n" \
- "vqshrun.s16 d0, q10, #6 \n" /* B */ \
- "vqshrun.s16 d1, q11, #6 \n" /* G */ \
- "vqshrun.s16 d2, q8, #6 \n" /* R */ \
- "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\
- "vmovl.u8 q11, d1 \n" \
- "vmovl.u8 q8, d2 \n" \
- "vtrn.u8 d20, d21 \n" \
- "vtrn.u8 d22, d23 \n" \
- "vtrn.u8 d16, d17 \n" \
- "vmov.u8 d21, d16 \n"
+#define YUVTORGB_SETUP \
+ "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
+ "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
+ "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
+ "ld1r {v31.4s}, [%[kYToRgb]] \n" \
+ "ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \
+ "ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n"
-static vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-static vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
- 0, 0, 0, 0, 0, 0, 0, 0 };
+#define YUVTORGB(vR, vG, vB) \
+ "uxtl v0.8h, v0.8b \n" /* Extract Y */ \
+ "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
+ "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
+ "ushll v0.4s, v0.4h, #0 \n" \
+ "mul v3.4s, v3.4s, v31.4s \n" \
+ "mul v0.4s, v0.4s, v31.4s \n" \
+ "sqshrun v0.4h, v0.4s, #16 \n" \
+ "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \
+ "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \
+ "mov v2.d[0], v1.d[1] \n" /* Extract V */ \
+ "uxtl v2.8h, v2.8b \n" \
+ "uxtl v1.8h, v1.8b \n" /* Extract U */ \
+ "mul v3.8h, v1.8h, v27.8h \n" \
+ "mul v5.8h, v1.8h, v29.8h \n" \
+ "mul v6.8h, v2.8h, v30.8h \n" \
+ "mul v7.8h, v2.8h, v28.8h \n" \
+ "sqadd v6.8h, v6.8h, v5.8h \n" \
+ "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \
+ "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \
+ "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \
+ "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \
+ "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \
+ "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \
+ "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \
+ "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \
+ "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \
#ifdef HAS_I444TOARGBROW_NEON
void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n" /* A */
"1: \n"
READYUV444
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I444TOARGBROW_NEON
@@ -164,179 +164,128 @@ void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n" /* A */
"1: \n"
READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422TOARGBROW_NEON
+#ifdef HAS_I422ALPHATOARGBROW_NEON
+void I422AlphaToARGBRow_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ const uint8* src_a,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "1: \n"
+ READYUV422
+ YUVTORGB(v22, v21, v20)
+ MEMACCESS(3)
+ "ld1 {v23.8b}, [%3], #8 \n"
+ "subs %w5, %w5, #8 \n"
+ MEMACCESS(4)
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(src_a), // %3
+ "+r"(dst_argb), // %4
+ "+r"(width) // %5
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+#endif // HAS_I422ALPHATOARGBROW_NEON
+
#ifdef HAS_I411TOARGBROW_NEON
void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n" /* A */
"1: \n"
READYUV411
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I411TOARGBROW_NEON
-#ifdef HAS_I422TOBGRAROW_NEON
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d19, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_bgra), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-#endif // HAS_I422TOBGRAROW_NEON
-
-#ifdef HAS_I422TOABGRROW_NEON
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_abgr), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-#endif // HAS_I422TOABGRROW_NEON
-
#ifdef HAS_I422TORGBAROW_NEON
void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v20.8b, #255 \n" /* A */
"1: \n"
READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d19, #255 \n"
+ YUVTORGB(v23, v22, v21)
+ "subs %w4, %w4, #8 \n"
MEMACCESS(3)
- "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_rgba), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422TORGBAROW_NEON
@@ -346,440 +295,324 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
MEMACCESS(3)
- "vst3.8 {d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgb24), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_rgb24), // %3
+ "+r"(width) // %4
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422TORGB24ROW_NEON
-#ifdef HAS_I422TORAWROW_NEON
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width) {
- asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- MEMACCESS(3)
- "vst3.8 {d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_raw), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-#endif // HAS_I422TORAWROW_NEON
-
#define ARGBTORGB565 \
- "vshr.u8 d20, d20, #3 \n" /* B */ \
- "vshr.u8 d21, d21, #2 \n" /* G */ \
- "vshr.u8 d22, d22, #3 \n" /* R */ \
- "vmovl.u8 q8, d20 \n" /* B */ \
- "vmovl.u8 q9, d21 \n" /* G */ \
- "vmovl.u8 q10, d22 \n" /* R */ \
- "vshl.u16 q9, q9, #5 \n" /* G */ \
- "vshl.u16 q10, q10, #11 \n" /* R */ \
- "vorr q0, q8, q9 \n" /* BG */ \
- "vorr q0, q0, q10 \n" /* BGR */
+ "shll v0.8h, v22.8b, #8 \n" /* R */ \
+ "shll v20.8h, v20.8b, #8 \n" /* B */ \
+ "shll v21.8h, v21.8b, #8 \n" /* G */ \
+ "sri v0.8h, v21.8h, #5 \n" /* RG */ \
+ "sri v0.8h, v20.8h, #11 \n" /* RGB */
#ifdef HAS_I422TORGB565ROW_NEON
void I422ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
ARGBTORGB565
MEMACCESS(3)
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
+ "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_rgb565), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422TORGB565ROW_NEON
#define ARGBTOARGB1555 \
- "vshr.u8 q10, q10, #3 \n" /* B */ \
- "vshr.u8 d22, d22, #3 \n" /* R */ \
- "vshr.u8 d23, d23, #7 \n" /* A */ \
- "vmovl.u8 q8, d20 \n" /* B */ \
- "vmovl.u8 q9, d21 \n" /* G */ \
- "vmovl.u8 q10, d22 \n" /* R */ \
- "vmovl.u8 q11, d23 \n" /* A */ \
- "vshl.u16 q9, q9, #5 \n" /* G */ \
- "vshl.u16 q10, q10, #10 \n" /* R */ \
- "vshl.u16 q11, q11, #15 \n" /* A */ \
- "vorr q0, q8, q9 \n" /* BG */ \
- "vorr q1, q10, q11 \n" /* RA */ \
- "vorr q0, q0, q1 \n" /* BGRA */
+ "shll v0.8h, v23.8b, #8 \n" /* A */ \
+ "shll v22.8h, v22.8b, #8 \n" /* R */ \
+ "shll v20.8h, v20.8b, #8 \n" /* B */ \
+ "shll v21.8h, v21.8b, #8 \n" /* G */ \
+ "sri v0.8h, v22.8h, #1 \n" /* AR */ \
+ "sri v0.8h, v21.8h, #6 \n" /* ARG */ \
+ "sri v0.8h, v20.8h, #11 \n" /* ARGB */
#ifdef HAS_I422TOARGB1555ROW_NEON
void I422ToARGB1555Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
"1: \n"
READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
ARGBTOARGB1555
MEMACCESS(3)
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555.
- "bgt 1b \n"
+ "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb1555), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422TOARGB1555ROW_NEON
#define ARGBTOARGB4444 \
- "vshr.u8 d20, d20, #4 \n" /* B */ \
- "vbic.32 d21, d21, d4 \n" /* G */ \
- "vshr.u8 d22, d22, #4 \n" /* R */ \
- "vbic.32 d23, d23, d4 \n" /* A */ \
- "vorr d0, d20, d21 \n" /* BG */ \
- "vorr d1, d22, d23 \n" /* RA */ \
- "vzip.u8 d0, d1 \n" /* BGRA */
+ /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \
+ "ushr v20.8b, v20.8b, #4 \n" /* B */ \
+ "bic v21.8b, v21.8b, v4.8b \n" /* G */ \
+ "ushr v22.8b, v22.8b, #4 \n" /* R */ \
+ "bic v23.8b, v23.8b, v4.8b \n" /* A */ \
+ "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \
+ "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \
+ "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */
#ifdef HAS_I422TOARGB4444ROW_NEON
void I422ToARGB4444Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(5)
- "vld1.8 {d24}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- "vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v4.16b, #0x0f \n" // bits to clear with vbic.
"1: \n"
READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
+ "movi v23.8b, #255 \n"
ARGBTOARGB4444
MEMACCESS(3)
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444.
- "bgt 1b \n"
+ "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444.
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb4444), // %3
"+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422TOARGB4444ROW_NEON
-#ifdef HAS_YTOARGBROW_NEON
-void YToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- asm volatile (
- MEMACCESS(3)
- "vld1.8 {d24}, [%3] \n"
- MEMACCESS(4)
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV400
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-#endif // HAS_YTOARGBROW_NEON
-
#ifdef HAS_I400TOARGBROW_NEON
void I400ToARGBRow_NEON(const uint8* src_y,
uint8* dst_argb,
int width) {
+ int64 width64 = (int64)(width);
asm volatile (
- ".p2align 2 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
+ "1: \n"
+ READYUV400
+ YUVTORGB(v22, v21, v20)
+ "subs %w2, %w2, #8 \n"
+ MEMACCESS(1)
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width64) // %2
+ : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
+ [kUVToG]"r"(&kYuvI601Constants.kUVToG),
+ [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvI601Constants.kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+#endif // HAS_I400TOARGBROW_NEON
+
+#ifdef HAS_J400TOARGBROW_NEON
+void J400ToARGBRow_NEON(const uint8* src_y,
+ uint8* dst_argb,
+ int width) {
+ asm volatile (
+ "movi v23.8b, #255 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {d20}, [%0]! \n"
- "vmov d21, d20 \n"
- "vmov d22, d20 \n"
- "subs %2, %2, #8 \n"
+ "ld1 {v20.8b}, [%0], #8 \n"
+ "orr v21.8b, v20.8b, v20.8b \n"
+ "orr v22.8b, v20.8b, v20.8b \n"
+ "subs %w2, %w2, #8 \n"
MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
- : "cc", "memory", "d20", "d21", "d22", "d23"
+ : "cc", "memory", "v20", "v21", "v22", "v23"
);
}
-#endif // HAS_I400TOARGBROW_NEON
+#endif // HAS_J400TOARGBROW_NEON
#ifdef HAS_NV12TOARGBROW_NEON
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
"1: \n"
READNV12
- YUV422TORGB
- "subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w3, %w3, #8 \n"
MEMACCESS(2)
- "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV12TOARGBROW_NEON
-#ifdef HAS_NV21TOARGBROW_NEON
+#ifdef HAS_NV12TOARGBROW_NEON
void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
+ const uint8* src_vu,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
"1: \n"
READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w3, %w3, #8 \n"
MEMACCESS(2)
- "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_y), // %0
- "+r"(src_uv), // %1
+ "+r"(src_vu), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
-#endif // HAS_NV21TOARGBROW_NEON
+#endif // HAS_NV12TOARGBROW_NEON
#ifdef HAS_NV12TORGB565ROW_NEON
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
"1: \n"
READNV12
- YUV422TORGB
- "subs %3, %3, #8 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w3, %w3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
+ "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV12TORGB565ROW_NEON
-#ifdef HAS_NV21TORGB565ROW_NEON
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- MEMACCESS(4)
- "vld1.8 {d24}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-#endif // HAS_NV21TORGB565ROW_NEON
-
#ifdef HAS_YUY2TOARGBROW_NEON
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
+ int64 width64 = (int64)(width);
asm volatile (
- MEMACCESS(3)
- "vld1.8 {d24}, [%3] \n"
- MEMACCESS(4)
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
"1: \n"
READYUY2
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w2, %w2, #8 \n"
MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
+ "b.gt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ "+r"(width64) // %2
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_YUY2TOARGBROW_NEON
@@ -787,31 +620,28 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
#ifdef HAS_UYVYTOARGBROW_NEON
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
+ int64 width64 = (int64)(width);
asm volatile (
- MEMACCESS(3)
- "vld1.8 {d24}, [%3] \n"
- MEMACCESS(4)
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
"1: \n"
READUYVY
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
+ YUVTORGB(v22, v21, v20)
+ "subs %w2, %w2, #8 \n"
MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
+ "b.gt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ "+r"(width64) // %2
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_UYVYTOARGBROW_NEON
@@ -821,16 +651,15 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pairs of UV
- "subs %3, %3, #16 \n" // 16 processed per loop
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pairs of UV
+ "subs %w3, %w3, #16 \n" // 16 processed per loop
MEMACCESS(1)
"st1 {v0.16b}, [%1], #16 \n" // store U
MEMACCESS(2)
"st1 {v1.16b}, [%2], #16 \n" // store V
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
@@ -846,16 +675,15 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n" // load U
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n" // load V
- "subs %3, %3, #16 \n" // 16 processed per loop
+ "subs %w3, %w3, #16 \n" // 16 processed per loop
MEMACCESS(2)
- "st2 {v0.16b, v1.16b}, [%2], #32 \n" // store 16 pairs of UV
- "bgt 1b \n"
+ "st2 {v0.16b,v1.16b}, [%2], #32 \n" // store 16 pairs of UV
+ "b.gt 1b \n"
:
"+r"(src_u), // %0
"+r"(src_v), // %1
@@ -871,14 +699,13 @@ void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
#ifdef HAS_COPYROW_NEON
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld1 {v0.8b-v3.8b}, [%0], #32 \n" // load 32
- "subs %2, %2, #32 \n" // 32 processed per loop
+ "ld1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 32
+ "subs %w2, %w2, #32 \n" // 32 processed per loop
MEMACCESS(1)
- "st1 {v0.8b-v3.8b}, [%1], #32 \n" // store 32
- "bgt 1b \n"
+ "st1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 32
+ "b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(count) // %2 // Output registers
@@ -888,57 +715,58 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
}
#endif // HAS_COPYROW_NEON
-// SetRow8 writes 'count' bytes using a 32 bit value repeated.
-#ifdef HAS_SETROW_NEON
-void SetRow_NEON(uint8* dst, uint32 v32, int count) {
+// SetRow writes 'count' bytes using an 8 bit value repeated.
+void SetRow_NEON(uint8* dst, uint8 v8, int count) {
asm volatile (
- "dup v0.4s, %w2 \n" // duplicate 4 ints
- "1: \n"
- "subs %1, %1, #16 \n" // 16 bytes per loop
+ "dup v0.16b, %w2 \n" // duplicate 16 bytes
+ "1: \n"
+ "subs %w1, %w1, #16 \n" // 16 bytes per loop
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" // store
- "bgt 1b \n"
+ "b.gt 1b \n"
+ : "+r"(dst), // %0
+ "+r"(count) // %1
+ : "r"(v8) // %2
+ : "cc", "memory", "v0"
+ );
+}
+
+void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
+ asm volatile (
+ "dup v0.4s, %w2 \n" // duplicate 4 ints
+ "1: \n"
+ "subs %w1, %w1, #4 \n" // 4 ints per loop
+ MEMACCESS(0)
+ "st1 {v0.16b}, [%0], #16 \n" // store
+ "b.gt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
: "cc", "memory", "v0"
);
}
-#endif // HAS_SETROW_NEON
-
-// TODO(fbarchard): Make fully assembler
-// SetRow32 writes 'count' words using a 32 bit value repeated.
-#ifdef HAS_ARGBSETROWS_NEON
-void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- for (int y = 0; y < height; ++y) {
- SetRow_NEON(dst, v32, width << 2);
- dst += dst_stride;
- }
-}
-#endif // HAS_ARGBSETROWS_NEON
#ifdef HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+ int64 width64 = (int64) width;
asm volatile (
// Start at end of source row.
"add %0, %0, %2 \n"
"sub %0, %0, #16 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
- "subs %2, %2, #16 \n" // 16 pixels per loop.
+ "subs %2, %2, #16 \n" // 16 pixels per loop.
"rev64 v0.16b, v0.16b \n"
MEMACCESS(1)
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
MEMACCESS(1)
"st1 {v0.D}[0], [%1], #8 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(width) // %2
+ "+r"(width64) // %2
: "r"((ptrdiff_t)-16) // %3
: "cc", "memory", "v0"
);
@@ -948,12 +776,12 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
#ifdef HAS_MIRRORUVROW_NEON
void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
+ int64 width64 = (int64) width;
asm volatile (
// Start at end of source row.
"add %0, %0, %3, lsl #1 \n"
"sub %0, %0, #16 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
@@ -961,14 +789,14 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"rev64 v0.8b, v0.8b \n"
"rev64 v1.8b, v1.8b \n"
MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // dst += 8
+ "st1 {v0.8b}, [%1], #8 \n" // dst += 8
MEMACCESS(2)
- "st1 {v1.8b}, [%2], #8 \n"
- "bgt 1b \n"
+ "st1 {v1.8b}, [%2], #8 \n"
+ "b.gt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(width) // %3
+ "+r"(width64) // %3
: "r"((ptrdiff_t)-16) // %4
: "cc", "memory", "v0", "v1"
);
@@ -977,12 +805,12 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
#ifdef HAS_ARGBMIRRORROW_NEON
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+ int64 width64 = (int64) width;
asm volatile (
// Start at end of source row.
"add %0, %0, %2, lsl #2 \n"
"sub %0, %0, #16 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
@@ -992,10 +820,10 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
MEMACCESS(1)
"st1 {v0.D}[0], [%1], #8 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(width) // %2
+ "+r"(width64) // %2
: "r"((ptrdiff_t)-16) // %3
: "cc", "memory", "v0"
);
@@ -1003,20 +831,19 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
#endif // HAS_ARGBMIRRORROW_NEON
#ifdef HAS_RGB24TOARGBROW_NEON
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) {
asm volatile (
"movi v4.8b, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld3 {v1.8b-v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
MEMACCESS(1)
- "st4 {v1.8b-v4.8b}, [%1], #32 \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
+ "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
@@ -1024,159 +851,185 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
#endif // HAS_RGB24TOARGBROW_NEON
#ifdef HAS_RAWTOARGBROW_NEON
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
+void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) {
asm volatile (
"movi v5.8b, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld3 {v0.8b-v2.8b}, [%0], #24 \n" // read r g b
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "mov v3.8b, v1.8b \n" // move g
- "mov v4.8b, v0.8b \n" // move r
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "orr v3.8b, v1.8b, v1.8b \n" // move g
+ "orr v4.8b, v0.8b, v0.8b \n" // move r
MEMACCESS(1)
- "st4 {v2.8b-v5.8b}, [%1], #32 \n" // store b g r a
- "bgt 1b \n"
+ "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a
+ "b.gt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
}
#endif // HAS_RAWTOARGBROW_NEON
-#define RGB565TOARGB \
- "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
- "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
- "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \
- "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \
- "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
- "vorr.u8 d0, d0, d4 \n" /* B */ \
- "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \
- "vorr.u8 d2, d1, d5 \n" /* R */ \
- "vorr.u8 d1, d4, d6 \n" /* G */
-
-#ifdef HAS_RGB565TOARGBROW_NEON
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
+void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "orr v3.8b, v1.8b, v1.8b \n" // move g
+ "orr v4.8b, v0.8b, v0.8b \n" // move r
+ MEMACCESS(1)
+ "st3 {v2.8b,v3.8b,v4.8b}, [%1], #24 \n" // store b g r
+ "b.gt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
+ );
+}
+
+#define RGB565TOARGB \
+ "shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \
+ "shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \
+ "ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \
+ "orr v1.8b, v4.8b, v6.8b \n" /* G */ \
+ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
+ "ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \
+ "xtn2 v2.16b,v0.8h \n" /* R in upper part */ \
+ "shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \
+ "ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \
+ "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
+ "dup v2.2D, v0.D[1] \n" /* R */
+
+#ifdef HAS_RGB565TOARGBROW_NEON
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
+ asm volatile (
+ "movi v3.8b, #255 \n" // Alpha
+ "1: \n"
+ MEMACCESS(0)
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
RGB565TOARGB
MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_rgb565), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List
);
}
#endif // HAS_RGB565TOARGBROW_NEON
#define ARGB1555TOARGB \
- "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \
- "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \
- "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \
- "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
- "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \
- "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \
- "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \
- "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \
- "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \
- "vorr.u8 q1, q1, q3 \n" /* R,A */ \
- "vorr.u8 q0, q0, q2 \n" /* B,G */ \
+ "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
+ "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
+ "xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \
+ \
+ "sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \
+ "xtn2 v3.16b, v2.8h \n" \
+ \
+ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
+ "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
+ \
+ "ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \
+ "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
+ "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
+ \
+ "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
+ "orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \
+ "dup v1.2D, v0.D[1] \n" \
+ "dup v3.2D, v2.D[1] \n"
// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
#define RGB555TOARGB \
- "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \
- "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \
- "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \
- "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \
- "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
- "vorr.u8 d0, d0, d4 \n" /* B */ \
- "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \
- "vorr.u8 d2, d1, d5 \n" /* R */ \
- "vorr.u8 d1, d4, d6 \n" /* G */
+ "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
+ "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
+ "xtn v3.8b, v2.8h \n" /* RRRRR000 */ \
+ \
+ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
+ "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
+ \
+ "ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \
+ "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
+ "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
+ \
+ "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
+ "orr v2.16b, v1.16b, v3.16b \n" /* R */ \
+ "dup v1.2D, v0.D[1] \n" /* G */ \
#ifdef HAS_ARGB1555TOARGBROW_NEON
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
+ int width) {
asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
+ "movi v3.8b, #255 \n" // Alpha
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
ARGB1555TOARGB
MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_argb1555), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
#endif // HAS_ARGB1555TOARGBROW_NEON
#define ARGB4444TOARGB \
- "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \
- "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \
- "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \
- "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \
- "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \
- "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \
- "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
- "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
+ "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \
+ "xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \
+ "shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \
+ "ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \
+ "ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \
+ "shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \
+ "orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \
+ "orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \
+ "dup v0.2D, v2.D[1] \n" \
+ "dup v1.2D, v3.D[1] \n"
#ifdef HAS_ARGB4444TOARGBROW_NEON
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
+ int width) {
asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
ARGB4444TOARGB
MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_argb4444), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q2" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
);
}
#endif // HAS_ARGB4444TOARGBROW_NEON
#ifdef HAS_ARGBTORGB24ROW_NEON
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
+void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
MEMACCESS(1)
- "st3 {v1.8b-v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24.
- "bgt 1b \n"
+ "st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb24), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
@@ -1184,21 +1037,20 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
#endif // HAS_ARGBTORGB24ROW_NEON
#ifdef HAS_ARGBTORAWROW_NEON
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
+void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load b g r a
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "mov v4.8b, v2.8b \n" // mov g
- "mov v5.8b, v1.8b \n" // mov b
+ "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "orr v4.8b, v2.8b, v2.8b \n" // mov g
+ "orr v5.8b, v1.8b, v1.8b \n" // mov b
MEMACCESS(1)
- "st3 {v3.8b-v5.8b}, [%1], #24 \n" // store r g b
- "bgt 1b \n"
+ "st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_raw), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
@@ -1206,19 +1058,18 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
#endif // HAS_ARGBTORAWROW_NEON
#ifdef HAS_YUY2TOYROW_NEON
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
+void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
- "subs %2, %2, #16 \n" // 16 processed per loop.
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
+ "subs %w2, %w2, #16 \n" // 16 processed per loop.
MEMACCESS(1)
"st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1" // Clobber List
);
@@ -1226,19 +1077,18 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
#endif // HAS_YUY2TOYROW_NEON
#ifdef HAS_UYVYTOYROW_NEON
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
+void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
- "subs %2, %2, #16 \n" // 16 processed per loop.
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
+ "subs %w2, %w2, #16 \n" // 16 processed per loop.
MEMACCESS(1)
"st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1" // Clobber List
);
@@ -1247,22 +1097,21 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
#ifdef HAS_YUY2TOUV422ROW_NEON
void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
- "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 YUY2 pixels
+ "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
"st1 {v1.8b}, [%1], #8 \n" // store 8 U.
MEMACCESS(2)
"st1 {v3.8b}, [%2], #8 \n" // store 8 V.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
@@ -1271,22 +1120,21 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
#ifdef HAS_UYVYTOUV422ROW_NEON
void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
- "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 UYVY pixels
+ "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 U.
MEMACCESS(2)
"st1 {v2.8b}, [%2], #8 \n" // store 8 V.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
@@ -1295,154 +1143,82 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
#ifdef HAS_YUY2TOUVROW_NEON
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_yuy2b = src_yuy2 + stride_yuy2;
asm volatile (
- "add %x1, %x0, %w1, sxtw \n" // stride + src_yuy2
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
- "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
+ "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row YUY2.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row
"urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U
"urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V
MEMACCESS(2)
"st1 {v1.8b}, [%2], #8 \n" // store 8 U.
MEMACCESS(3)
"st1 {v3.8b}, [%3], #8 \n" // store 8 V.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_yuy2), // %0
- "+r"(stride_yuy2), // %1
+ "+r"(src_yuy2b), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4",
+ "v5", "v6", "v7" // Clobber List
);
}
#endif // HAS_YUY2TOUVROW_NEON
#ifdef HAS_UYVYTOUVROW_NEON
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_uyvyb = src_uyvy + stride_uyvy;
asm volatile (
- "add %x1, %x0, %w1, sxtw \n" // stride + src_uyvy
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
- "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
+ "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs.
MEMACCESS(1)
- "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row UYVY.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row
"urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U
"urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V
MEMACCESS(2)
"st1 {v0.8b}, [%2], #8 \n" // store 8 U.
MEMACCESS(3)
"st1 {v2.8b}, [%3], #8 \n" // store 8 V.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_uyvy), // %0
- "+r"(stride_uyvy), // %1
+ "+r"(src_uyvyb), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4",
+ "v5", "v6", "v7" // Clobber List
);
}
#endif // HAS_UYVYTOUVROW_NEON
-#ifdef HAS_HALFROW_NEON
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %x1, %x0, %w1, sxtw \n"
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels.
- "subs %3, %3, #16 \n" // 16 processed per loop
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels.
- "urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2
- MEMACCESS(2)
- "st1 {v0.16b}, [%2], #16 \n"
- "bgt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(src_uv_stride), // %1
- "+r"(dst_uv), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "v0", "v1" // Clobber List
- );
-}
-#endif // HAS_HALFROW_NEON
-
-// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
-#ifdef HAS_ARGBTOBAYERROW_NEON
-void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- asm volatile (
- "mov v2.s[0], %w3 \n" // selector
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b, v1.16b}, [%0], 32 \n" // load row 8 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop
- "tbl v4.8b, {v0.16b}, v2.8b \n" // look up 4 pixels
- "tbl v5.8b, {v1.16b}, v2.8b \n" // look up 4 pixels
- "trn1 v4.4s, v4.4s, v5.4s \n" // combine 8 pixels
- MEMACCESS(1)
- "st1 {v4.8b}, [%1], #8 \n" // store 8.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- : "r"(selector) // %3
- : "cc", "memory", "v0", "v1", "v2", "v4", "v5" // Clobber List
- );
-}
-#endif // HAS_ARGBTOBAYERROW_NEON
-
-// Select G channels from ARGB. e.g. GGGGGGGG
-#ifdef HAS_ARGBTOBAYERGGROW_NEON
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 /*selector*/, int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load row 8 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop
- MEMACCESS(1)
- "st1 {v1.8b}, [%1], #8 \n" // store 8 G's.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-#endif // HAS_ARGBTOBAYERGGROW_NEON
-
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
#ifdef HAS_ARGBSHUFFLEROW_NEON
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
asm volatile (
MEMACCESS(3)
"ld1 {v2.16b}, [%3] \n" // shuffler
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels.
- "subs %2, %2, #4 \n" // 4 processed per loop
+ "subs %w2, %w2, #4 \n" // 4 processed per loop
"tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels
MEMACCESS(1)
"st1 {v1.16b}, [%1], #16 \n" // store 4.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
: "r"(shuffler) // %3
: "cc", "memory", "v0", "v1", "v2" // Clobber List
);
@@ -1455,19 +1231,18 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
const uint8* src_v,
uint8* dst_yuy2, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
- "mov v2.8b, v1.8b \n"
+ "orr v2.8b, v1.8b, v1.8b \n"
MEMACCESS(1)
"ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
MEMACCESS(2)
"ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs
- "subs %4, %4, #16 \n" // 16 pixels
+ "subs %w4, %w4, #16 \n" // 16 pixels
MEMACCESS(3)
- "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 YUY2/16 pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels.
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@@ -1485,19 +1260,18 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
const uint8* src_v,
uint8* dst_uyvy, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld2 {v1.8b, v2.8b}, [%0], #16 \n" // load 16 Ys
- "mov v3.8b, v2.8b \n"
+ "ld2 {v1.8b,v2.8b}, [%0], #16 \n" // load 16 Ys
+ "orr v3.8b, v2.8b, v2.8b \n"
MEMACCESS(1)
"ld1 {v0.8b}, [%1], #8 \n" // load 8 Us
MEMACCESS(2)
"ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs
- "subs %4, %4, #16 \n" // 16 pixels
+ "subs %w4, %w4, #16 \n" // 16 pixels
MEMACCESS(3)
- "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 UYVY/16 pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels.
+ "b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@@ -1510,83 +1284,104 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
#endif // HAS_I422TOUYVYROW_NEON
#ifdef HAS_ARGBTORGB565ROW_NEON
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
+void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
ARGBTORGB565
MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
+ "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb565), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+ : "cc", "memory", "v0", "v20", "v21", "v22", "v23"
+ );
+}
+#endif // HAS_ARGBTORGB565ROW_NEON
+
+#ifdef HAS_ARGBTORGB565DITHERROW_NEON
+void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width) {
+ asm volatile (
+ "dup v1.4s, %w2 \n" // dither4
+ "1: \n"
+ MEMACCESS(1)
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "uqadd v20.8b, v20.8b, v1.8b \n"
+ "uqadd v21.8b, v21.8b, v1.8b \n"
+ "uqadd v22.8b, v22.8b, v1.8b \n"
+ ARGBTORGB565
+ MEMACCESS(0)
+ "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565.
+ "b.gt 1b \n"
+ : "+r"(dst_rgb) // %0
+ : "r"(src_argb), // %1
+ "r"(dither4), // %2
+ "r"(width) // %3
+ : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23"
);
}
#endif // HAS_ARGBTORGB565ROW_NEON
#ifdef HAS_ARGBTOARGB1555ROW_NEON
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
- int pix) {
+ int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
ARGBTOARGB1555
MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555.
- "bgt 1b \n"
+ "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB1555.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb1555), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+ : "cc", "memory", "v0", "v20", "v21", "v22", "v23"
);
}
#endif // HAS_ARGBTOARGB1555ROW_NEON
#ifdef HAS_ARGBTOARGB4444ROW_NEON
void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
- int pix) {
+ int width) {
asm volatile (
- "vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
+ "movi v4.16b, #0x0f \n" // bits to clear with vbic.
"1: \n"
MEMACCESS(0)
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
ARGBTOARGB4444
MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444.
- "bgt 1b \n"
+ "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB4444.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb4444), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+ : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23"
);
}
#endif // HAS_ARGBTOARGB4444ROW_NEON
#ifdef HAS_ARGBTOYROW_NEON
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
"movi v6.8b, #33 \n" // R * 0.2578 coefficient
"movi v7.8b, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v3.8h, v0.8b, v4.8b \n" // B
"umlal v3.8h, v1.8b, v5.8b \n" // G
"umlal v3.8h, v2.8b, v6.8b \n" // R
@@ -1594,10 +1389,10 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
"uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
@@ -1605,26 +1400,25 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
#endif // HAS_ARGBTOYROW_NEON
#ifdef HAS_ARGBTOYJROW_NEON
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
"movi v4.8b, #15 \n" // B * 0.11400 coefficient
"movi v5.8b, #75 \n" // G * 0.58700 coefficient
"movi v6.8b, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v3.8h, v0.8b, v4.8b \n" // B
"umlal v3.8h, v1.8b, v5.8b \n" // G
"umlal v3.8h, v2.8b, v6.8b \n" // R
"sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
);
@@ -1634,7 +1428,7 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
// 8x1 pixels.
#ifdef HAS_ARGBTOUV444ROW_NEON
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
"movi v24.8b, #112 \n" // UB / VR 0.875 coefficient
"movi v25.8b, #74 \n" // UG -0.5781 coefficient
@@ -1642,11 +1436,10 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"movi v27.8b, #18 \n" // VB -0.1406 coefficient
"movi v28.8b, #94 \n" // VG -0.7344 coefficient
"movi v29.16b,#0x80 \n" // 128.5
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"umull v4.8h, v0.8b, v24.8b \n" // B
"umlsl v4.8h, v1.8b, v25.8b \n" // G
"umlsl v4.8h, v2.8b, v26.8b \n" // R
@@ -1664,11 +1457,11 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
MEMACCESS(2)
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4",
"v24", "v25", "v26", "v27", "v28", "v29"
@@ -1676,27 +1469,29 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
}
#endif // HAS_ARGBTOUV444ROW_NEON
-// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+#define RGBTOUV_SETUP_REG \
+ "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
+ "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
+ "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
+ "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
+ "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
+ "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
+
+// 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#ifdef HAS_ARGBTOUV422ROW_NEON
void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
- "movi v20.8h, #112 / 2 \n" // UB / VR 0.875 coefficient
- "movi v21.8h, #74 / 2 \n" // UG -0.5781 coefficient
- "movi v22.8h, #38 / 2 \n" // UR -0.2969 coefficient
- "movi v23.8h, #18 / 2 \n" // VB -0.1406 coefficient
- "movi v24.8h, #94 / 2 \n" // VG -0.7344 coefficient
- "movi v25.16b, #0x80 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "ld4 {v0.16b-v3.16b}, [%0], #64 \n" // load 16 ARGB pixels.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
- "subs %3, %3, #16 \n" // 16 processed per loop.
+ "subs %w3, %w3, #16 \n" // 16 processed per loop.
"mul v3.8h, v0.8h, v20.8h \n" // B
"mls v3.8h, v1.8h, v21.8h \n" // G
"mls v3.8h, v2.8h, v22.8h \n" // R
@@ -1714,11 +1509,11 @@ void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
MEMACCESS(2)
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v20", "v21", "v22", "v23", "v24", "v25"
@@ -1726,26 +1521,20 @@ void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
}
#endif // HAS_ARGBTOUV422ROW_NEON
-// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32.
+// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
#ifdef HAS_ARGBTOUV411ROW_NEON
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
+ int width) {
asm volatile (
- "movi v20.8h, #112 / 2 \n" // UB / VR 0.875 coefficient
- "movi v21.8h, #74 / 2 \n" // UG -0.5781 coefficient
- "movi v22.8h, #38 / 2 \n" // UR -0.2969 coefficient
- "movi v23.8h, #18 / 2 \n" // VB -0.1406 coefficient
- "movi v24.8h, #94 / 2 \n" // VG -0.7344 coefficient
- "movi v25.16b, #0x80 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "ld4 {v0.16b-v3.16b}, [%0], #64 \n" // load 16 ARGB pixels.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
MEMACCESS(0)
- "ld4 {v4.16b-v7.16b}, [%0], #64 \n" // load next 16 ARGB pixels.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%0], #64 \n" // load next 16.
"uaddlp v4.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v5.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v6.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
@@ -1758,7 +1547,7 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"urshr v1.8h, v1.8h, #1 \n"
"urshr v2.8h, v2.8h, #1 \n"
- "subs %3, %3, #32 \n" // 32 processed per loop.
+ "subs %w3, %w3, #32 \n" // 32 processed per loop.
"mul v3.8h, v0.8h, v20.8h \n" // B
"mls v3.8h, v1.8h, v21.8h \n" // G
"mls v3.8h, v2.8h, v22.8h \n" // R
@@ -1773,11 +1562,11 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
MEMACCESS(2)
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
- "+r"(pix) // %3
+ "+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v20", "v21", "v22", "v23", "v24", "v25"
@@ -1785,67 +1574,60 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
}
#endif // HAS_ARGBTOUV411ROW_NEON
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#define RGBTOUV(QB, QG, QR) \
- "vmul.s16 q8, " #QB ", q10 \n" /* B */ \
- "vmls.s16 q8, " #QG ", q11 \n" /* G */ \
- "vmls.s16 q8, " #QR ", q12 \n" /* R */ \
- "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
- "vmul.s16 q9, " #QR ", q10 \n" /* R */ \
- "vmls.s16 q9, " #QG ", q14 \n" /* G */ \
- "vmls.s16 q9, " #QB ", q13 \n" /* B */ \
- "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
- "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
- "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
+ "mul v3.8h, " #QB ",v20.8h \n" /* B */ \
+ "mul v4.8h, " #QR ",v20.8h \n" /* R */ \
+ "mls v3.8h, " #QG ",v21.8h \n" /* G */ \
+ "mls v4.8h, " #QG ",v24.8h \n" /* G */ \
+ "mls v3.8h, " #QR ",v22.8h \n" /* R */ \
+ "mls v4.8h, " #QB ",v23.8h \n" /* B */ \
+ "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \
+ "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \
+ "uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \
+ "uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */
// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
+// TODO(fbarchard): consider ptrdiff_t for all strides.
+
#ifdef HAS_ARGBTOUVROW_NEON
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_argb_1 = src_argb + src_stride_argb;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
+ MEMACCESS(1)
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
- "+r"(src_stride_argb), // %1
+ "+r"(src_argb_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_ARGBTOUVROW_NEON
@@ -1853,628 +1635,573 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
// TODO(fbarchard): Subsample match C code.
#ifdef HAS_ARGBTOUVJROW_NEON
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_argb_1 = src_argb + src_stride_argb;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
- "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
- "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
- "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
- "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
+ "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
+ "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
+ "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
+ "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
+ "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
- "+r"(src_stride_argb), // %1
+ "+r"(src_argb_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_ARGBTOUVJROW_NEON
#ifdef HAS_BGRATOUVROW_NEON
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_bgra_1 = src_bgra + src_stride_bgra;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_bgra
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
- "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts.
MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels.
- "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more
+ "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts.
- "vrshr.u16 q1, q1, #1 \n" // 2x average
- "vrshr.u16 q2, q2, #1 \n"
- "vrshr.u16 q3, q3, #1 \n"
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v3.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q3, q2, q1)
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_bgra), // %0
- "+r"(src_stride_bgra), // %1
+ "+r"(src_bgra_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_BGRATOUVROW_NEON
#ifdef HAS_ABGRTOUVROW_NEON
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_abgr_1 = src_abgr + src_stride_abgr;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_abgr
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
- "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
- "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more.
+ "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
+ "urshr v0.8h, v3.8h, #1 \n" // 2x average
+ "urshr v2.8h, v2.8h, #1 \n"
+ "urshr v1.8h, v1.8h, #1 \n"
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q2, q1, q0)
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v2.8h, v1.8h)
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_abgr), // %0
- "+r"(src_stride_abgr), // %1
+ "+r"(src_abgr_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_ABGRTOUVROW_NEON
#ifdef HAS_RGBATOUVROW_NEON
void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_rgba_1 = src_rgba + src_stride_rgba;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_rgba
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
- "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts.
MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels.
- "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more.
+ "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts.
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_rgba), // %0
- "+r"(src_stride_rgba), // %1
+ "+r"(src_rgba_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_RGBATOUVROW_NEON
#ifdef HAS_RGB24TOUVROW_NEON
void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_rgb24
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
- MEMACCESS(0)
- "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
MEMACCESS(1)
- "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels.
- MEMACCESS(1)
- "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
+ "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 16 more.
+ "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_rgb24), // %0
- "+r"(src_stride_rgb24), // %1
+ "+r"(src_rgb24_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_RGB24TOUVROW_NEON
#ifdef HAS_RAWTOUVROW_NEON
void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_raw_1 = src_raw + src_stride_raw;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_raw
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
- MEMACCESS(0)
- "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
- "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
+ "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 8 RAW pixels.
+ "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
MEMACCESS(1)
- "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels.
- MEMACCESS(1)
- "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels.
- "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
+ "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 8 more RAW pixels
+ "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v0.8h, v0.8h, #1 \n"
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q2, q1, q0)
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v2.8h, v1.8h, v0.8h)
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_raw), // %0
- "+r"(src_stride_raw), // %1
+ "+r"(src_raw_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif // HAS_RAWTOUVROW_NEON
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#ifdef HAS_RGB565TOUVROW_NEON
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / 2
+ "movi v23.8h, #37, lsl #0 \n" // UG coeff (-0.5781) / 2
+ "movi v24.8h, #19, lsl #0 \n" // UR coeff (-0.2969) / 2
+ "movi v25.8h, #9 , lsl #0 \n" // VB coeff (-0.1406) / 2
+ "movi v26.8h, #47, lsl #0 \n" // VG coeff (-0.7344) / 2
+ "movi v27.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
RGB565TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // next 8 RGB565 pixels.
RGB565TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+ "uaddlp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels.
+ "ld1 {v0.16b}, [%1], #16 \n" // load 8 RGB565 pixels.
RGB565TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels.
+ "ld1 {v0.16b}, [%1], #16 \n" // next 8 RGB565 pixels.
RGB565TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+ "uadalp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
+ "ins v16.D[1], v17.D[0] \n"
+ "ins v18.D[1], v19.D[0] \n"
+ "ins v20.D[1], v21.D[0] \n"
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
+ "urshr v4.8h, v16.8h, #1 \n" // 2x average
+ "urshr v5.8h, v18.8h, #1 \n"
+ "urshr v6.8h, v20.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ "mul v16.8h, v4.8h, v22.8h \n" // B
+ "mls v16.8h, v5.8h, v23.8h \n" // G
+ "mls v16.8h, v6.8h, v24.8h \n" // R
+ "add v16.8h, v16.8h, v27.8h \n" // +128 -> unsigned
+ "mul v17.8h, v6.8h, v22.8h \n" // R
+ "mls v17.8h, v5.8h, v26.8h \n" // G
+ "mls v17.8h, v4.8h, v25.8h \n" // B
+ "add v17.8h, v17.8h, v27.8h \n" // +128 -> unsigned
+ "uqshrn v0.8b, v16.8h, #8 \n" // 16 bit to 8 bit U
+ "uqshrn v1.8b, v17.8h, #8 \n" // 16 bit to 8 bit V
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_rgb565), // %0
- "+r"(src_stride_rgb565), // %1
+ "+r"(src_rgb565_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
+ "v25", "v26", "v27"
);
}
#endif // HAS_RGB565TOUVROW_NEON
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#ifdef HAS_ARGB1555TOUVROW_NEON
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
RGB555TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB1555 pixels.
RGB555TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+ "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels.
+ "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB1555 pixels.
RGB555TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels.
+ "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB1555 pixels.
RGB555TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+ "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
+ "ins v16.D[1], v26.D[0] \n"
+ "ins v17.D[1], v27.D[0] \n"
+ "ins v18.D[1], v28.D[0] \n"
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
+ "urshr v4.8h, v16.8h, #1 \n" // 2x average
+ "urshr v5.8h, v17.8h, #1 \n"
+ "urshr v6.8h, v18.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ "mul v2.8h, v4.8h, v20.8h \n" // B
+ "mls v2.8h, v5.8h, v21.8h \n" // G
+ "mls v2.8h, v6.8h, v22.8h \n" // R
+ "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned
+ "mul v3.8h, v6.8h, v20.8h \n" // R
+ "mls v3.8h, v5.8h, v24.8h \n" // G
+ "mls v3.8h, v4.8h, v23.8h \n" // B
+ "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
+ "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U
+ "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_argb1555), // %0
- "+r"(src_stride_argb1555), // %1
+ "+r"(src_argb1555_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28"
);
}
#endif // HAS_ARGB1555TOUVROW_NEON
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#ifdef HAS_ARGB4444TOUVROW_NEON
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
+ const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444;
asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
+ RGBTOUV_SETUP_REG
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
ARGB4444TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels.
+ "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB4444 pixels.
ARGB4444TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+ "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels.
+ "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB4444 pixels.
ARGB4444TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels.
+ "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB4444 pixels.
ARGB4444TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+ "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
+ "ins v16.D[1], v26.D[0] \n"
+ "ins v17.D[1], v27.D[0] \n"
+ "ins v18.D[1], v28.D[0] \n"
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
+ "urshr v4.8h, v16.8h, #1 \n" // 2x average
+ "urshr v5.8h, v17.8h, #1 \n"
+ "urshr v6.8h, v18.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ "mul v2.8h, v4.8h, v20.8h \n" // B
+ "mls v2.8h, v5.8h, v21.8h \n" // G
+ "mls v2.8h, v6.8h, v22.8h \n" // R
+ "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned
+ "mul v3.8h, v6.8h, v20.8h \n" // R
+ "mls v3.8h, v5.8h, v24.8h \n" // G
+ "mls v3.8h, v4.8h, v23.8h \n" // B
+ "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
+ "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U
+ "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
: "+r"(src_argb4444), // %0
- "+r"(src_stride_argb4444), // %1
+ "+r"(src_argb4444_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
- "+r"(pix) // %4
+ "+r"(width) // %4
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28"
+
);
}
#endif // HAS_ARGB4444TOUVROW_NEON
#ifdef HAS_RGB565TOYROW_NEON
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) {
asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
+ "movi v24.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v25.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v26.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v27.8b, #16 \n" // Add 16 constant
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
RGB565TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
+ "umull v3.8h, v0.8b, v24.8b \n" // B
+ "umlal v3.8h, v1.8b, v25.8b \n" // G
+ "umlal v3.8h, v2.8b, v26.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v27.8b \n"
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
: "+r"(src_rgb565), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6",
+ "v24", "v25", "v26", "v27"
);
}
#endif // HAS_RGB565TOYROW_NEON
#ifdef HAS_ARGB1555TOYROW_NEON
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
+void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) {
asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
+ "movi v4.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
ARGB1555TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
: "+r"(src_argb1555), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
);
}
#endif // HAS_ARGB1555TOYROW_NEON
#ifdef HAS_ARGB4444TOYROW_NEON
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
+void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) {
asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
+ "movi v24.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v25.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v26.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v27.8b, #16 \n" // Add 16 constant
"1: \n"
MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
ARGB4444TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
+ "umull v3.8h, v0.8b, v24.8b \n" // B
+ "umlal v3.8h, v1.8b, v25.8b \n" // G
+ "umlal v3.8h, v2.8b, v26.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v27.8b \n"
MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
: "+r"(src_argb4444), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27"
);
}
#endif // HAS_ARGB4444TOYROW_NEON
#ifdef HAS_BGRATOYROW_NEON
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) {
asm volatile (
"movi v4.8b, #33 \n" // R * 0.2578 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
"movi v6.8b, #13 \n" // B * 0.1016 coefficient
"movi v7.8b, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 pixels of BGRA.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v16.8h, v1.8b, v4.8b \n" // R
"umlal v16.8h, v2.8b, v5.8b \n" // G
"umlal v16.8h, v3.8b, v6.8b \n" // B
@@ -2482,10 +2209,10 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
"uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_bgra), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
@@ -2493,17 +2220,16 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
#endif // HAS_BGRATOYROW_NEON
#ifdef HAS_ABGRTOYROW_NEON
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) {
asm volatile (
"movi v4.8b, #33 \n" // R * 0.2578 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
"movi v6.8b, #13 \n" // B * 0.1016 coefficient
"movi v7.8b, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 pixels of ABGR.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v16.8h, v0.8b, v4.8b \n" // R
"umlal v16.8h, v1.8b, v5.8b \n" // G
"umlal v16.8h, v2.8b, v6.8b \n" // B
@@ -2511,10 +2237,10 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
"uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
@@ -2522,17 +2248,16 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
#endif // HAS_ABGRTOYROW_NEON
#ifdef HAS_RGBATOYROW_NEON
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) {
asm volatile (
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
"movi v6.8b, #33 \n" // R * 0.2578 coefficient
"movi v7.8b, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 pixels of RGBA.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v16.8h, v1.8b, v4.8b \n" // B
"umlal v16.8h, v2.8b, v5.8b \n" // G
"umlal v16.8h, v3.8b, v6.8b \n" // R
@@ -2540,10 +2265,10 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
"uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
@@ -2551,17 +2276,16 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
#endif // HAS_RGBATOYROW_NEON
#ifdef HAS_RGB24TOYROW_NEON
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) {
asm volatile (
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
"movi v6.8b, #33 \n" // R * 0.2578 coefficient
"movi v7.8b, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld3 {v0.8b-v2.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v16.8h, v0.8b, v4.8b \n" // B
"umlal v16.8h, v1.8b, v5.8b \n" // G
"umlal v16.8h, v2.8b, v6.8b \n" // R
@@ -2569,10 +2293,10 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
"uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
@@ -2580,17 +2304,16 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
#endif // HAS_RGB24TOYROW_NEON
#ifdef HAS_RAWTOYROW_NEON
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
asm volatile (
"movi v4.8b, #33 \n" // R * 0.2578 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
"movi v6.8b, #13 \n" // B * 0.1016 coefficient
"movi v7.8b, #16 \n" // Add 16 constant
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld3 {v0.8b-v2.8b}, [%0], #24 \n" // load 8 pixels of RAW.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v16.8h, v0.8b, v4.8b \n" // B
"umlal v16.8h, v1.8b, v5.8b \n" // G
"umlal v16.8h, v2.8b, v6.8b \n" // R
@@ -2598,10 +2321,10 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
"uqadd v0.8b, v0.8b, v7.8b \n"
MEMACCESS(1)
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_y), // %1
- "+r"(pix) // %2
+ "+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
@@ -2617,14 +2340,10 @@ void InterpolateRow_NEON(uint8* dst_ptr,
int y0_fraction = 256 - y1_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
asm volatile (
- "cmp %4, #0 \n"
- "beq 100f \n"
- "cmp %4, #64 \n"
- "beq 75f \n"
- "cmp %4, #128 \n"
- "beq 50f \n"
- "cmp %4, #192 \n"
- "beq 25f \n"
+ "cmp %w4, #0 \n"
+ "b.eq 100f \n"
+ "cmp %w4, #128 \n"
+ "b.eq 50f \n"
"dup v5.16b, %w4 \n"
"dup v4.16b, %w5 \n"
@@ -2634,7 +2353,7 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
"umull v2.8h, v0.8b, v4.8b \n"
"umull2 v3.8h, v0.16b, v4.16b \n"
"umlal v2.8h, v1.8b, v5.8b \n"
@@ -2643,21 +2362,7 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"rshrn2 v0.16b, v3.8h, #8 \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "bgt 25b \n"
+ "b.gt 1b \n"
"b 99f \n"
// Blend 50 / 50.
@@ -2666,35 +2371,21 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v0.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "bgt 75b \n"
+ "b.gt 50b \n"
"b 99f \n"
// Blend 100 / 0 - Copy row unchanged.
"100: \n"
MEMACCESS(1)
"ld1 {v0.16b}, [%1], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 100b \n"
+ "b.gt 100b \n"
"99: \n"
: "+r"(dst_ptr), // %0
@@ -2714,15 +2405,15 @@ void InterpolateRow_NEON(uint8* dst_ptr,
void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
- "subs %3, %3, #8 \n"
- "blt 89f \n"
+ "subs %w3, %w3, #8 \n"
+ "b.lt 89f \n"
// Blend 8 pixels.
"8: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 pixels of ARGB0.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB0 pixels
MEMACCESS(1)
- "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 pixels of ARGB1.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 ARGB1 pixels
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"umull v16.8h, v4.8b, v3.8b \n" // db * a
"umull v17.8h, v5.8b, v3.8b \n" // dg * a
"umull v18.8h, v6.8b, v3.8b \n" // dr * a
@@ -2737,20 +2428,20 @@ void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
"uqadd v2.8b, v2.8b, v6.8b \n" // + sr
"movi v3.8b, #255 \n" // a = 255
MEMACCESS(2)
- "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 pixels of ARGB.
- "bge 8b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
+ "b.ge 8b \n"
"89: \n"
- "adds %3, %3, #8-1 \n"
- "blt 99f \n"
+ "adds %w3, %w3, #8-1 \n"
+ "b.lt 99f \n"
// Blend 1 pixels.
"1: \n"
MEMACCESS(0)
- "ld4 {v0.b-v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0.
+ "ld4 {v0.b,v1.b,v2.b,v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0.
MEMACCESS(1)
- "ld4 {v4.b-v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1.
- "subs %3, %3, #1 \n" // 1 processed per loop.
+ "ld4 {v4.b,v5.b,v6.b,v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1.
+ "subs %w3, %w3, #1 \n" // 1 processed per loop.
"umull v16.8h, v4.8b, v3.8b \n" // db * a
"umull v17.8h, v5.8b, v3.8b \n" // dg * a
"umull v18.8h, v6.8b, v3.8b \n" // dr * a
@@ -2765,8 +2456,8 @@ void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
"uqadd v2.8b, v2.8b, v6.8b \n" // + sr
"movi v3.8b, #255 \n" // a = 255
MEMACCESS(2)
- "st4 {v0.b-v3.b}[0], [%2], #4 \n" // store 1 pixel.
- "bge 1b \n"
+ "st4 {v0.b,v1.b,v2.b,v3.b}[0], [%2], #4 \n" // store 1 pixel.
+ "b.ge 1b \n"
"99: \n"
@@ -2788,8 +2479,8 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
// Attenuate 8 pixels.
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v4.8h, v0.8b, v3.8b \n" // b * a
"umull v5.8h, v1.8b, v3.8b \n" // g * a
"umull v6.8h, v2.8b, v3.8b \n" // r * a
@@ -2797,8 +2488,8 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
"uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8
"uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8
MEMACCESS(1)
- "st4 {v0.8b-v3.8b}, [%1], #32 \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -2820,11 +2511,10 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
"dup v6.8h, %w4 \n" // interval add
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0] \n" // load 8 pixels of ARGB.
- "subs %1, %1, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 pixels of ARGB.
+ "subs %w1, %w1, #8 \n" // 8 processed per loop.
"uxtl v0.8h, v0.8b \n" // b (0 .. 255)
"uxtl v1.8h, v1.8b \n"
"uxtl v2.8h, v2.8b \n"
@@ -2841,8 +2531,8 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
"uqxtn v1.8b, v1.8h \n"
"uqxtn v2.8b, v2.8h \n"
MEMACCESS(0)
- "st4 {v0.8b-v3.8b}, [%0], #32 \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(dst_argb), // %0
"+r"(width) // %1
: "r"(scale), // %2
@@ -2865,11 +2555,10 @@ void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
"ushr v0.8h, v0.8h, #1 \n" // scale / 2.
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v4.8b-v7.8b}, [%0], #32 \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"uxtl v4.8h, v4.8b \n" // b (0 .. 255)
"uxtl v5.8h, v5.8b \n"
"uxtl v6.8h, v6.8b \n"
@@ -2883,8 +2572,8 @@ void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
"uqxtn v6.8b, v6.8h \n"
"uqxtn v7.8b, v7.8h \n"
MEMACCESS(1)
- "st4 {v4.8b-v7.8b}, [%1], #32 \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
+ "st4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -2903,20 +2592,19 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
"movi v24.8b, #15 \n" // B * 0.11400 coefficient
"movi v25.8b, #75 \n" // G * 0.58700 coefficient
"movi v26.8b, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v4.8h, v0.8b, v24.8b \n" // B
"umlal v4.8h, v1.8b, v25.8b \n" // G
"umlal v4.8h, v2.8b, v26.8b \n" // R
"sqrshrun v0.8b, v4.8h, #7 \n" // 15 bit to 8 bit B
- "mov v1.8b, v0.8b \n" // G
- "mov v2.8b, v0.8b \n" // R
+ "orr v1.8b, v0.8b, v0.8b \n" // G
+ "orr v2.8b, v0.8b, v0.8b \n" // R
MEMACCESS(1)
- "st4 {v0.8b-v3.8b}, [%1], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 pixels.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -2943,11 +2631,10 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
"movi v28.8b, #24 \n" // BB coefficient
"movi v29.8b, #98 \n" // BG coefficient
"movi v30.8b, #50 \n" // BR coefficient
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0] \n" // load 8 ARGB pixels.
- "subs %1, %1, #8 \n" // 8 processed per loop.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels.
+ "subs %w1, %w1, #8 \n" // 8 processed per loop.
"umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B
"umlal v4.8h, v1.8b, v21.8b \n" // G
"umlal v4.8h, v2.8b, v22.8b \n" // R
@@ -2961,8 +2648,8 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
"uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G
"uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R
MEMACCESS(0)
- "st4 {v0.8b-v3.8b}, [%0], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels.
+ "b.gt 1b \n"
: "+r"(dst_argb), // %0
"+r"(width) // %1
:
@@ -2984,11 +2671,10 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
"sxtl v0.8h, v2.8b \n" // B,G coefficients s16.
"sxtl2 v1.8h, v2.16b \n" // R,A coefficients s16.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v16.8b-v19.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
+ "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"uxtl v16.8h, v16.8b \n" // b (0 .. 255) 16 bit
"uxtl v17.8h, v17.8b \n" // g
"uxtl v18.8h, v18.8b \n" // r
@@ -3026,8 +2712,8 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
"sqshrun v18.8b, v24.8h, #6 \n" // 16 bit to 8 bit R
"sqshrun v19.8b, v25.8h, #6 \n" // 16 bit to 8 bit A
MEMACCESS(1)
- "st4 {v16.8b-v19.8b}, [%1], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // store 8 pixels.
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -3045,13 +2731,12 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"umull v0.8h, v0.8b, v4.8b \n" // multiply B
"umull v1.8h, v1.8b, v5.8b \n" // multiply G
"umull v2.8h, v2.8b, v6.8b \n" // multiply R
@@ -3061,8 +2746,8 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
"rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R
"rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
MEMACCESS(2)
- "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_argb0), // %0
"+r"(src_argb1), // %1
@@ -3080,20 +2765,19 @@ void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"uqadd v0.8b, v0.8b, v4.8b \n"
"uqadd v1.8b, v1.8b, v5.8b \n"
"uqadd v2.8b, v2.8b, v6.8b \n"
"uqadd v3.8b, v3.8b, v7.8b \n"
MEMACCESS(2)
- "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_argb0), // %0
"+r"(src_argb1), // %1
@@ -3111,20 +2795,19 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
asm volatile (
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
MEMACCESS(1)
- "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"uqsub v0.8b, v0.8b, v4.8b \n"
"uqsub v1.8b, v1.8b, v5.8b \n"
"uqsub v2.8b, v2.8b, v6.8b \n"
"uqsub v3.8b, v3.8b, v7.8b \n"
MEMACCESS(2)
- "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_argb0), // %0
"+r"(src_argb1), // %1
@@ -3147,19 +2830,18 @@ void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
asm volatile (
"movi v3.8b, #255 \n" // alpha
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx.
MEMACCESS(1)
"ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"uqadd v0.8b, v0.8b, v1.8b \n" // add
- "mov v1.8b, v0.8b \n"
- "mov v2.8b, v0.8b \n"
+ "orr v1.8b, v0.8b, v0.8b \n"
+ "orr v2.8b, v0.8b, v0.8b \n"
MEMACCESS(2)
- "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
@@ -3176,17 +2858,16 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width) {
asm volatile (
// 16 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx.
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely.
- "subs %3, %3, #16 \n" // 16 processed per loop.
+ "subs %w3, %w3, #16 \n" // 16 processed per loop.
"uqadd v0.16b, v0.16b, v1.16b \n" // add
MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n" // store 16 pixels.
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_y), // %2
@@ -3208,17 +2889,16 @@ void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
asm volatile (
"movi v3.8b, #255 \n" // alpha
// 8 pixel loop.
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx.
MEMACCESS(1)
"ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"uqadd v1.8b, v0.8b, v2.8b \n" // add
MEMACCESS(2)
- "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
+ "b.gt 1b \n"
: "+r"(src_sobelx), // %0
"+r"(src_sobely), // %1
"+r"(dst_argb), // %2
@@ -3237,7 +2917,6 @@ void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.8b}, [%0],%5 \n" // top
@@ -3255,21 +2934,21 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
"ld1 {v2.8b}, [%2],%5 \n" // bottom
MEMACCESS(2)
"ld1 {v3.8b}, [%2],%6 \n"
- "subs %4, %4, #8 \n" // 8 pixels
+ "subs %w4, %w4, #8 \n" // 8 pixels
"usubl v1.8h, v2.8b, v3.8b \n"
"add v0.8h, v0.8h, v1.8h \n"
"abs v0.8h, v0.8h \n"
"uqxtn v0.8b, v0.8h \n"
MEMACCESS(3)
"st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
"+r"(src_y2), // %2
"+r"(dst_sobelx), // %3
"+r"(width) // %4
- : "r"(2), // %5
- "r"(6) // %6
+ : "r"(2LL), // %5
+ "r"(6LL) // %6
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
@@ -3283,7 +2962,6 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.8b}, [%0],%4 \n" // left
@@ -3301,20 +2979,20 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
"ld1 {v2.8b}, [%0],%5 \n" // right
MEMACCESS(1)
"ld1 {v3.8b}, [%1],%5 \n"
- "subs %3, %3, #8 \n" // 8 pixels
+ "subs %w3, %w3, #8 \n" // 8 pixels
"usubl v1.8h, v2.8b, v3.8b \n"
"add v0.8h, v0.8h, v1.8h \n"
"abs v0.8h, v0.8h \n"
"uqxtn v0.8b, v0.8h \n"
MEMACCESS(2)
"st1 {v0.8b}, [%2], #8 \n" // store 8 sobely
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_y0), // %0
"+r"(src_y1), // %1
"+r"(dst_sobely), // %2
"+r"(width) // %3
- : "r"(1), // %4
- "r"(6) // %5
+ : "r"(1LL), // %4
+ "r"(6LL) // %5
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
}
diff --git a/TMessagesProj/jni/libyuv/source/row_win.cc b/TMessagesProj/jni/libyuv/source/row_win.cc
index f58fc5138..5cb5d1e4f 100644
--- a/TMessagesProj/jni/libyuv/source/row_win.cc
+++ b/TMessagesProj/jni/libyuv/source/row_win.cc
@@ -10,7 +10,8 @@
#include "libyuv/row.h"
-#if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_X64) && \
+ defined(_MSC_VER) && !defined(__clang__)
#include
#include // For _mm_maddubs_epi16
#endif
@@ -20,166 +21,110 @@ namespace libyuv {
extern "C" {
#endif
-// This module is for Visual C.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-#define UB 127 /* min(127,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-static const vec8 kUVToB = {
- UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
-};
-
-static const vec8 kUVToR = {
- UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
-};
-
-static const vec8 kUVToG = {
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
-};
-
-static const vec8 kVUToB = {
- VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
-};
-
-static const vec8 kVUToR = {
- VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
-};
-
-static const vec8 kVUToG = {
- VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
-};
-
-static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
-static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
-static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
-static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
-static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
+// This module is for Visual C 32/64 bit and clangcl 32 bit
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
// 64 bit
#if defined(_M_X64)
-// Aligned destination version.
-__declspec(align(16))
+// Read 4 UV from 422, upsample to 8 UV.
+#define READYUV422 \
+ xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
+ xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
+ u_buf += 4; \
+ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
+ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
+ y_buf += 8;
+
+// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
+#define READYUVA422 \
+ xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
+ xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
+ u_buf += 4; \
+ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
+ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
+ y_buf += 8; \
+ xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
+ a_buf += 8;
+
+// Convert 8 pixels: 8 UV and 8 Y.
+#define YUVTORGB(yuvconstants) \
+ xmm1 = _mm_loadu_si128(&xmm0); \
+ xmm2 = _mm_loadu_si128(&xmm0); \
+ xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \
+ xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \
+ xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \
+ xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \
+ xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \
+ xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \
+ xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \
+ xmm0 = _mm_adds_epi16(xmm0, xmm4); \
+ xmm1 = _mm_adds_epi16(xmm1, xmm4); \
+ xmm2 = _mm_adds_epi16(xmm2, xmm4); \
+ xmm0 = _mm_srai_epi16(xmm0, 6); \
+ xmm1 = _mm_srai_epi16(xmm1, 6); \
+ xmm2 = _mm_srai_epi16(xmm2, 6); \
+ xmm0 = _mm_packus_epi16(xmm0, xmm0); \
+ xmm1 = _mm_packus_epi16(xmm1, xmm1); \
+ xmm2 = _mm_packus_epi16(xmm2, xmm2);
+
+// Store 8 ARGB values.
+#define STOREARGB \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \
+ xmm1 = _mm_loadu_si128(&xmm0); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \
+ xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \
+ _mm_storeu_si128((__m128i *)dst_argb, xmm0); \
+ _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \
+ dst_argb += 32;
+
+
+#if defined(HAS_I422TOARGBROW_SSSE3)
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
- __m128i xmm0, xmm1, xmm2, xmm3;
+ __m128i xmm0, xmm1, xmm2, xmm4;
const __m128i xmm5 = _mm_set1_epi8(-1);
- const __m128i xmm4 = _mm_setzero_si128();
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
-
while (width > 0) {
- xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
- xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
- xmm1 = _mm_load_si128(&xmm0);
- xmm2 = _mm_load_si128(&xmm0);
- xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
- xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
- xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
- xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
- xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
- xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
- xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
- xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
- xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
- xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
- xmm0 = _mm_adds_epi16(xmm0, xmm3);
- xmm1 = _mm_adds_epi16(xmm1, xmm3);
- xmm2 = _mm_adds_epi16(xmm2, xmm3);
- xmm0 = _mm_srai_epi16(xmm0, 6);
- xmm1 = _mm_srai_epi16(xmm1, 6);
- xmm2 = _mm_srai_epi16(xmm2, 6);
- xmm0 = _mm_packus_epi16(xmm0, xmm0);
- xmm1 = _mm_packus_epi16(xmm1, xmm1);
- xmm2 = _mm_packus_epi16(xmm2, xmm2);
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
- xmm1 = _mm_load_si128(&xmm0);
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
- xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
-
- _mm_store_si128((__m128i *)dst_argb, xmm0);
- _mm_store_si128((__m128i *)(dst_argb + 16), xmm1);
-
- y_buf += 8;
- u_buf += 4;
- dst_argb += 32;
+ READYUV422
+ YUVTORGB(yuvconstants)
+ STOREARGB
width -= 8;
}
}
+#endif
-// Unaligned destination version.
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __m128i xmm0, xmm1, xmm2, xmm3;
- const __m128i xmm5 = _mm_set1_epi8(-1);
- const __m128i xmm4 = _mm_setzero_si128();
+#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
+void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
-
while (width > 0) {
- xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
- xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
- xmm1 = _mm_load_si128(&xmm0);
- xmm2 = _mm_load_si128(&xmm0);
- xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
- xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
- xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
- xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
- xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
- xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
- xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
- xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
- xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
- xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
- xmm0 = _mm_adds_epi16(xmm0, xmm3);
- xmm1 = _mm_adds_epi16(xmm1, xmm3);
- xmm2 = _mm_adds_epi16(xmm2, xmm3);
- xmm0 = _mm_srai_epi16(xmm0, 6);
- xmm1 = _mm_srai_epi16(xmm1, 6);
- xmm2 = _mm_srai_epi16(xmm2, 6);
- xmm0 = _mm_packus_epi16(xmm0, xmm0);
- xmm1 = _mm_packus_epi16(xmm1, xmm1);
- xmm2 = _mm_packus_epi16(xmm2, xmm2);
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
- xmm1 = _mm_load_si128(&xmm0);
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
- xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
-
- _mm_storeu_si128((__m128i *)dst_argb, xmm0);
- _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);
-
- y_buf += 8;
- u_buf += 4;
- dst_argb += 32;
+ READYUVA422
+ YUVTORGB(yuvconstants)
+ STOREARGB
width -= 8;
}
}
+#endif
+
// 32 bit
#else // defined(_M_X64)
-
#ifdef HAS_ARGBTOYROW_SSSE3
// Constants for ARGB.
@@ -208,15 +153,10 @@ static const vec8 kARGBToVJ = {
-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
};
-// vpermd for vphaddw + vpackuswb vpermd.
-static const lvec32 kPermdARGBToY_AVX = {
- 0, 4, 1, 5, 2, 6, 3, 7
-};
-
// vpshufb for vphaddw + vpackuswb packed to shorts.
static const lvec8 kShufARGBToUV_AVX = {
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
- 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
+ 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15
};
// Constants for BGRA.
@@ -262,6 +202,7 @@ static const uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
+// 7 bit fixed point 0.5.
static const vec16 kAddYJ64 = {
64, 64, 64, 64, 64, 64, 64, 64
};
@@ -285,6 +226,24 @@ static const uvec8 kShuffleMaskRAWToARGB = {
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
};
+// Shuffle table for converting RAW to RGB24. First 8.
+static const uvec8 kShuffleMaskRAWToRGB24_0 = {
+ 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting RAW to RGB24. Middle 8.
+static const uvec8 kShuffleMaskRAWToRGB24_1 = {
+ 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting RAW to RGB24. Last 8.
+static const uvec8 kShuffleMaskRAWToRGB24_2 = {
+ 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
// Shuffle table for converting ARGB to RGB24.
static const uvec8 kShuffleMaskARGBToRGB24 = {
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
@@ -300,51 +259,46 @@ static const uvec8 kShuffleMaskARGBToRGB24_0 = {
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
};
-// Shuffle table for converting ARGB to RAW.
-static const uvec8 kShuffleMaskARGBToRAW_0 = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
+// YUY2 shuf 16 Y to 32 Y.
+static const lvec8 kShuffleYUY2Y = {
+ 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14,
+ 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
+};
+
+// YUY2 shuf 8 UV to 16 UV.
+static const lvec8 kShuffleYUY2UV = {
+ 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15,
+ 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15
+};
+
+// UYVY shuf 16 Y to 32 Y.
+static const lvec8 kShuffleUYVYY = {
+ 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15,
+ 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15
+};
+
+// UYVY shuf 8 UV to 16 UV.
+static const lvec8 kShuffleUYVYUV = {
+ 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14,
+ 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
+};
+
+// NV21 shuf 8 VU to 16 UV.
+static const lvec8 kShuffleNV21 = {
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
};
// Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) __declspec(align(16))
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
+__declspec(naked)
+void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_y
mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
- align 4
- convertloop:
- movq xmm0, qword ptr [eax]
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm0
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm0
- punpckhwd xmm1, xmm1
- por xmm0, xmm5
- por xmm1, xmm5
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, [esp + 4] // src_y
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
-
- align 4
convertloop:
movq xmm0, qword ptr [eax]
lea eax, [eax + 8]
@@ -363,17 +317,48 @@ void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
}
}
-__declspec(naked) __declspec(align(16))
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+#ifdef HAS_J400TOARGBROW_AVX2
+// Duplicates gray value 3 times and fills in alpha opaque.
+__declspec(naked)
+void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width) {
+ __asm {
+ mov eax, [esp + 4] // src_y
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
+ vpslld ymm5, ymm5, 24
+
+ convertloop:
+ vmovdqu xmm0, [eax]
+ lea eax, [eax + 16]
+ vpermq ymm0, ymm0, 0xd8
+ vpunpcklbw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ vpunpckhwd ymm1, ymm0, ymm0
+ vpunpcklwd ymm0, ymm0, ymm0
+ vpor ymm0, ymm0, ymm5
+ vpor ymm1, ymm1, ymm5
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_J400TOARGBROW_AVX2
+
+__declspec(naked)
+void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_rgb24
mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
- movdqa xmm4, kShuffleMaskRGB24ToARGB
+ movdqa xmm4, xmmword ptr kShuffleMaskRGB24ToARGB
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -385,35 +370,34 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
por xmm2, xmm5
palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
pshufb xmm0, xmm4
- movdqa [edx + 32], xmm2
+ movdqu [edx + 32], xmm2
por xmm0, xmm5
pshufb xmm1, xmm4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
por xmm1, xmm5
palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
pshufb xmm3, xmm4
- movdqa [edx + 16], xmm1
+ movdqu [edx + 16], xmm1
por xmm3, xmm5
- sub ecx, 16
- movdqa [edx + 48], xmm3
+ movdqu [edx + 48], xmm3
lea edx, [edx + 64]
+ sub ecx, 16
jg convertloop
ret
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
- int pix) {
+ int width) {
__asm {
mov eax, [esp + 4] // src_raw
mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
- movdqa xmm4, kShuffleMaskRAWToARGB
+ movdqa xmm4, xmmword ptr kShuffleMaskRAWToARGB
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -425,18 +409,46 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
por xmm2, xmm5
palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
pshufb xmm0, xmm4
- movdqa [edx + 32], xmm2
+ movdqu [edx + 32], xmm2
por xmm0, xmm5
pshufb xmm1, xmm4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
por xmm1, xmm5
palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
pshufb xmm3, xmm4
- movdqa [edx + 16], xmm1
+ movdqu [edx + 16], xmm1
por xmm3, xmm5
- sub ecx, 16
- movdqa [edx + 48], xmm3
+ movdqu [edx + 48], xmm3
lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked)
+void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width) {
+ __asm {
+ mov eax, [esp + 4] // src_raw
+ mov edx, [esp + 8] // dst_rgb24
+ mov ecx, [esp + 12] // width
+ movdqa xmm3, xmmword ptr kShuffleMaskRAWToRGB24_0
+ movdqa xmm4, xmmword ptr kShuffleMaskRAWToRGB24_1
+ movdqa xmm5, xmmword ptr kShuffleMaskRAWToRGB24_2
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 4]
+ movdqu xmm2, [eax + 8]
+ lea eax, [eax + 24]
+ pshufb xmm0, xmm3
+ pshufb xmm1, xmm4
+ pshufb xmm2, xmm5
+ movq qword ptr [edx], xmm0
+ movq qword ptr [edx + 8], xmm1
+ movq qword ptr [edx + 16], xmm2
+ lea edx, [edx + 24]
+ sub ecx, 8
jg convertloop
ret
}
@@ -449,9 +461,9 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
// v * (256 + 8)
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
// 20 instructions.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
- int pix) {
+ int width) {
__asm {
mov eax, 0x01080108 // generate multiplier to repeat 5 bits
movd xmm5, eax
@@ -469,11 +481,10 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
mov eax, [esp + 4] // src_rgb565
mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
sub edx, eax
sub edx, eax
- align 4
convertloop:
movdqu xmm0, [eax] // fetch 8 pixels of bgr565
movdqa xmm1, xmm0
@@ -490,8 +501,8 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
movdqa xmm2, xmm1
punpcklbw xmm1, xmm0
punpckhbw xmm2, xmm0
- movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
- movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
+ movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
+ movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
lea eax, [eax + 16]
sub ecx, 8
jg convertloop
@@ -499,10 +510,157 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
}
}
+#ifdef HAS_RGB565TOARGBROW_AVX2
+// pmul method to replicate bits.
+// Math to replicate bits:
+// (v << 8) | (v << 3)
+// v * 256 + v * 8
+// v * (256 + 8)
+// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
+__declspec(naked)
+void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x01080108 // generate multiplier to repeat 5 bits
+ vmovd xmm5, eax
+ vbroadcastss ymm5, xmm5
+ mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
+ vmovd xmm6, eax
+ vbroadcastss ymm6, xmm6
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
+ vpsllw ymm3, ymm3, 11
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x07e007e0 for Green
+ vpsllw ymm4, ymm4, 10
+ vpsrlw ymm4, ymm4, 5
+ vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha
+ vpsllw ymm7, ymm7, 8
+
+ mov eax, [esp + 4] // src_rgb565
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 16 pixels of bgr565
+ vpand ymm1, ymm0, ymm3 // R in upper 5 bits
+ vpsllw ymm2, ymm0, 11 // B in upper 5 bits
+ vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8)
+ vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8)
+ vpsllw ymm1, ymm1, 8
+ vpor ymm1, ymm1, ymm2 // RB
+ vpand ymm0, ymm0, ymm4 // G in middle 6 bits
+ vpmulhuw ymm0, ymm0, ymm6 // << 5 * (256 + 4)
+ vpor ymm0, ymm0, ymm7 // AG
+ vpermq ymm0, ymm0, 0xd8 // mutate for unpack
+ vpermq ymm1, ymm1, 0xd8
+ vpunpckhbw ymm2, ymm1, ymm0
+ vpunpcklbw ymm1, ymm1, ymm0
+ vmovdqu [eax * 2 + edx], ymm1 // store 4 pixels of ARGB
+ vmovdqu [eax * 2 + edx + 32], ymm2 // store next 4 pixels of ARGB
+ lea eax, [eax + 32]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_RGB565TOARGBROW_AVX2
+
+#ifdef HAS_ARGB1555TOARGBROW_AVX2
+__declspec(naked)
+void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x01080108 // generate multiplier to repeat 5 bits
+ vmovd xmm5, eax
+ vbroadcastss ymm5, xmm5
+ mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
+ vmovd xmm6, eax
+ vbroadcastss ymm6, xmm6
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
+ vpsllw ymm3, ymm3, 11
+ vpsrlw ymm4, ymm3, 6 // generate mask 0x03e003e0 for Green
+ vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha
+ vpsllw ymm7, ymm7, 8
+
+ mov eax, [esp + 4] // src_argb1555
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 16 pixels of 1555
+ vpsllw ymm1, ymm0, 1 // R in upper 5 bits
+ vpsllw ymm2, ymm0, 11 // B in upper 5 bits
+ vpand ymm1, ymm1, ymm3
+ vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8)
+ vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8)
+ vpsllw ymm1, ymm1, 8
+ vpor ymm1, ymm1, ymm2 // RB
+ vpsraw ymm2, ymm0, 8 // A
+ vpand ymm0, ymm0, ymm4 // G in middle 5 bits
+ vpmulhuw ymm0, ymm0, ymm6 // << 6 * (256 + 8)
+ vpand ymm2, ymm2, ymm7
+ vpor ymm0, ymm0, ymm2 // AG
+ vpermq ymm0, ymm0, 0xd8 // mutate for unpack
+ vpermq ymm1, ymm1, 0xd8
+ vpunpckhbw ymm2, ymm1, ymm0
+ vpunpcklbw ymm1, ymm1, ymm0
+ vmovdqu [eax * 2 + edx], ymm1 // store 8 pixels of ARGB
+ vmovdqu [eax * 2 + edx + 32], ymm2 // store next 8 pixels of ARGB
+ lea eax, [eax + 32]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGB1555TOARGBROW_AVX2
+
+#ifdef HAS_ARGB4444TOARGBROW_AVX2
+__declspec(naked)
+void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
+ vmovd xmm4, eax
+ vbroadcastss ymm4, xmm4
+ vpslld ymm5, ymm4, 4 // 0xf0f0f0f0 for high nibbles
+ mov eax, [esp + 4] // src_argb4444
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 16 pixels of bgra4444
+ vpand ymm2, ymm0, ymm5 // mask high nibbles
+ vpand ymm0, ymm0, ymm4 // mask low nibbles
+ vpsrlw ymm3, ymm2, 4
+ vpsllw ymm1, ymm0, 4
+ vpor ymm2, ymm2, ymm3
+ vpor ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // mutate for unpack
+ vpermq ymm2, ymm2, 0xd8
+ vpunpckhbw ymm1, ymm0, ymm2
+ vpunpcklbw ymm0, ymm0, ymm2
+ vmovdqu [eax * 2 + edx], ymm0 // store 8 pixels of ARGB
+ vmovdqu [eax * 2 + edx + 32], ymm1 // store next 8 pixels of ARGB
+ lea eax, [eax + 32]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGB4444TOARGBROW_AVX2
+
// 24 instructions
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
+ int width) {
__asm {
mov eax, 0x01080108 // generate multiplier to repeat 5 bits
movd xmm5, eax
@@ -519,11 +677,10 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
mov eax, [esp + 4] // src_argb1555
mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
sub edx, eax
sub edx, eax
- align 4
convertloop:
movdqu xmm0, [eax] // fetch 8 pixels of 1555
movdqa xmm1, xmm0
@@ -544,8 +701,8 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
movdqa xmm2, xmm1
punpcklbw xmm1, xmm0
punpckhbw xmm2, xmm0
- movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
- movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
+ movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
+ movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
lea eax, [eax + 16]
sub ecx, 8
jg convertloop
@@ -554,9 +711,9 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
}
// 18 instructions.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
+ int width) {
__asm {
mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
movd xmm4, eax
@@ -565,11 +722,10 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
pslld xmm5, 4
mov eax, [esp + 4] // src_argb4444
mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
sub edx, eax
sub edx, eax
- align 4
convertloop:
movdqu xmm0, [eax] // fetch 8 pixels of bgra4444
movdqa xmm2, xmm0
@@ -584,8 +740,8 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
- movdqa [eax * 2 + edx], xmm0 // store 4 pixels of ARGB
- movdqa [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB
+ movdqu [eax * 2 + edx], xmm0 // store 4 pixels of ARGB
+ movdqu [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB
lea eax, [eax + 16]
sub ecx, 8
jg convertloop
@@ -593,15 +749,14 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
}
}
-__declspec(naked) __declspec(align(16))
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
+__declspec(naked)
+void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- movdqa xmm6, kShuffleMaskARGBToRGB24
+ mov ecx, [esp + 12] // width
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
- align 4
convertloop:
movdqu xmm0, [eax] // fetch 16 pixels of argb
movdqu xmm1, [eax + 16]
@@ -632,15 +787,14 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
}
-__declspec(naked) __declspec(align(16))
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
+__declspec(naked)
+void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- movdqa xmm6, kShuffleMaskARGBToRAW
+ mov ecx, [esp + 12] // width
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
- align 4
convertloop:
movdqu xmm0, [eax] // fetch 16 pixels of argb
movdqu xmm1, [eax + 16]
@@ -671,12 +825,12 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
}
-__declspec(naked) __declspec(align(16))
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
+__declspec(naked)
+void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
psrld xmm3, 27
pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
@@ -685,9 +839,8 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
pcmpeqb xmm5, xmm5 // generate mask 0xfffff800
pslld xmm5, 11
- align 4
convertloop:
- movdqa xmm0, [eax] // fetch 4 pixels of argb
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
movdqa xmm1, xmm0 // B
movdqa xmm2, xmm0 // G
pslld xmm0, 8 // R
@@ -709,13 +862,101 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
}
+__declspec(naked)
+void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width) {
+ __asm {
+
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ movd xmm6, [esp + 12] // dither4
+ mov ecx, [esp + 16] // width
+ punpcklbw xmm6, xmm6 // make dither 16 bytes
+ movdqa xmm7, xmm6
+ punpcklwd xmm6, xmm6
+ punpckhwd xmm7, xmm7
+ pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
+ psrld xmm3, 27
+ pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
+ psrld xmm4, 26
+ pslld xmm4, 5
+ pcmpeqb xmm5, xmm5 // generate mask 0xfffff800
+ pslld xmm5, 11
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
+ paddusb xmm0, xmm6 // add dither
+ movdqa xmm1, xmm0 // B
+ movdqa xmm2, xmm0 // G
+ pslld xmm0, 8 // R
+ psrld xmm1, 3 // B
+ psrld xmm2, 5 // G
+ psrad xmm0, 16 // R
+ pand xmm1, xmm3 // B
+ pand xmm2, xmm4 // G
+ pand xmm0, xmm5 // R
+ por xmm1, xmm2 // BG
+ por xmm0, xmm1 // BGR
+ packssdw xmm0, xmm0
+ lea eax, [eax + 16]
+ movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
+ lea edx, [edx + 8]
+ sub ecx, 4
+ jg convertloop
+ ret
+ }
+}
+
+#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
+__declspec(naked)
+void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
+ const uint32 dither4, int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ vbroadcastss xmm6, [esp + 12] // dither4
+ mov ecx, [esp + 16] // width
+ vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes
+ vpermq ymm6, ymm6, 0xd8
+ vpunpcklwd ymm6, ymm6, ymm6
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
+ vpsrld ymm3, ymm3, 27
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
+ vpsrld ymm4, ymm4, 26
+ vpslld ymm4, ymm4, 5
+ vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpaddusb ymm0, ymm0, ymm6 // add dither
+ vpsrld ymm2, ymm0, 5 // G
+ vpsrld ymm1, ymm0, 3 // B
+ vpsrld ymm0, ymm0, 8 // R
+ vpand ymm2, ymm2, ymm4 // G
+ vpand ymm1, ymm1, ymm3 // B
+ vpand ymm0, ymm0, ymm5 // R
+ vpor ymm1, ymm1, ymm2 // BG
+ vpor ymm0, ymm0, ymm1 // BGR
+ vpackusdw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of RGB565
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTORGB565DITHERROW_AVX2
+
// TODO(fbarchard): Improve sign extension/packing.
-__declspec(naked) __declspec(align(16))
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
+__declspec(naked)
+void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
pcmpeqb xmm4, xmm4 // generate mask 0x0000001f
psrld xmm4, 27
movdqa xmm5, xmm4 // generate mask 0x000003e0
@@ -725,9 +966,8 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
pcmpeqb xmm7, xmm7 // generate mask 0xffff8000
pslld xmm7, 15
- align 4
convertloop:
- movdqa xmm0, [eax] // fetch 4 pixels of argb
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
movdqa xmm1, xmm0 // B
movdqa xmm2, xmm0 // G
movdqa xmm3, xmm0 // R
@@ -752,25 +992,24 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
}
-__declspec(naked) __declspec(align(16))
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
+__declspec(naked)
+void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
pcmpeqb xmm4, xmm4 // generate mask 0xf000f000
psllw xmm4, 12
movdqa xmm3, xmm4 // generate mask 0x00f000f0
psrlw xmm3, 8
- align 4
convertloop:
- movdqa xmm0, [eax] // fetch 4 pixels of argb
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
movdqa xmm1, xmm0
pand xmm0, xmm3 // low nibble
pand xmm1, xmm4 // high nibble
- psrl xmm0, 4
- psrl xmm1, 8
+ psrld xmm0, 4
+ psrld xmm1, 8
por xmm0, xmm1
packuswb xmm0, xmm0
lea eax, [eax + 16]
@@ -782,22 +1021,129 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
}
+#ifdef HAS_ARGBTORGB565ROW_AVX2
+__declspec(naked)
+void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
+ vpsrld ymm3, ymm3, 27
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
+ vpsrld ymm4, ymm4, 26
+ vpslld ymm4, ymm4, 5
+ vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpsrld ymm2, ymm0, 5 // G
+ vpsrld ymm1, ymm0, 3 // B
+ vpsrld ymm0, ymm0, 8 // R
+ vpand ymm2, ymm2, ymm4 // G
+ vpand ymm1, ymm1, ymm3 // B
+ vpand ymm0, ymm0, ymm5 // R
+ vpor ymm1, ymm1, ymm2 // BG
+ vpor ymm0, ymm0, ymm1 // BGR
+ vpackusdw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of RGB565
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTORGB565ROW_AVX2
+
+#ifdef HAS_ARGBTOARGB1555ROW_AVX2
+__declspec(naked)
+void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm4, ymm4, ymm4
+ vpsrld ymm4, ymm4, 27 // generate mask 0x0000001f
+ vpslld ymm5, ymm4, 5 // generate mask 0x000003e0
+ vpslld ymm6, ymm4, 10 // generate mask 0x00007c00
+ vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffff8000
+ vpslld ymm7, ymm7, 15
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpsrld ymm3, ymm0, 9 // R
+ vpsrld ymm2, ymm0, 6 // G
+ vpsrld ymm1, ymm0, 3 // B
+ vpsrad ymm0, ymm0, 16 // A
+ vpand ymm3, ymm3, ymm6 // R
+ vpand ymm2, ymm2, ymm5 // G
+ vpand ymm1, ymm1, ymm4 // B
+ vpand ymm0, ymm0, ymm7 // A
+ vpor ymm0, ymm0, ymm1 // BA
+ vpor ymm2, ymm2, ymm3 // GR
+ vpor ymm0, ymm0, ymm2 // BGRA
+ vpackssdw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of ARGB1555
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOARGB1555ROW_AVX2
+
+#ifdef HAS_ARGBTOARGB4444ROW_AVX2
+__declspec(naked)
+void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xf000f000
+ vpsllw ymm4, ymm4, 12
+ vpsrlw ymm3, ymm4, 8 // generate mask 0x00f000f0
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpand ymm1, ymm0, ymm4 // high nibble
+ vpand ymm0, ymm0, ymm3 // low nibble
+ vpsrld ymm1, ymm1, 8
+ vpsrld ymm0, ymm0, 4
+ vpor ymm0, ymm0, ymm1
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of ARGB4444
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOARGB4444ROW_AVX2
+
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+__declspec(naked)
+void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kARGBToY
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kARGBToY
+ movdqa xmm5, xmmword ptr kAddY16
- align 4
convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
pmaddubsw xmm0, xmm4
pmaddubsw xmm1, xmm4
pmaddubsw xmm2, xmm4
@@ -809,30 +1155,30 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
psrlw xmm2, 7
packuswb xmm0, xmm2
paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 16
jg convertloop
ret
}
}
-// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
+// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
+__declspec(naked)
+void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kARGBToYJ
+ movdqa xmm5, xmmword ptr kAddYJ64
- align 4
convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
pmaddubsw xmm0, xmm4
pmaddubsw xmm1, xmm4
pmaddubsw xmm2, xmm4
@@ -845,27 +1191,31 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
psrlw xmm0, 7
psrlw xmm2, 7
packuswb xmm0, xmm2
- sub ecx, 16
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 16
jg convertloop
ret
}
}
#ifdef HAS_ARGBTOYROW_AVX2
+// vpermd for vphaddw + vpackuswb vpermd.
+static const lvec32 kPermdARGBToY_AVX = {
+ 0, 4, 1, 5, 2, 6, 3, 7
+};
+
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
+__declspec(naked)
+void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- vbroadcastf128 ymm4, kARGBToY
- vbroadcastf128 ymm5, kAddY16
- vmovdqa ymm6, kPermdARGBToY_AVX
+ mov ecx, [esp + 12] /* width */
+ vbroadcastf128 ymm4, xmmword ptr kARGBToY
+ vbroadcastf128 ymm5, xmmword ptr kAddY16
+ vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -882,10 +1232,10 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
vpsrlw ymm2, ymm2, 7
vpackuswb ymm0, ymm0, ymm2 // mutates.
vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
- vpaddb ymm0, ymm0, ymm5
- sub ecx, 32
+ vpaddb ymm0, ymm0, ymm5 // add 16 for Y
vmovdqu [edx], ymm0
lea edx, [edx + 32]
+ sub ecx, 32
jg convertloop
vzeroupper
ret
@@ -893,19 +1243,18 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
}
#endif // HAS_ARGBTOYROW_AVX2
-#ifdef HAS_ARGBTOYROW_AVX2
+#ifdef HAS_ARGBTOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
+__declspec(naked)
+void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- vbroadcastf128 ymm4, kARGBToYJ
- vbroadcastf128 ymm5, kAddYJ64
- vmovdqa ymm6, kPermdARGBToY_AVX
+ mov ecx, [esp + 12] /* width */
+ vbroadcastf128 ymm4, xmmword ptr kARGBToYJ
+ vbroadcastf128 ymm5, xmmword ptr kAddYJ64
+ vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -924,9 +1273,9 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
vpsrlw ymm2, ymm2, 7
vpackuswb ymm0, ymm0, ymm2 // mutates.
vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
- sub ecx, 32
vmovdqu [edx], ymm0
lea edx, [edx + 32]
+ sub ecx, 32
jg convertloop
vzeroupper
@@ -935,16 +1284,15 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
}
#endif // HAS_ARGBTOYJROW_AVX2
-__declspec(naked) __declspec(align(16))
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+__declspec(naked)
+void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kARGBToY
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kBGRAToY
+ movdqa xmm5, xmmword ptr kAddY16
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -961,93 +1309,23 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
psrlw xmm2, 7
packuswb xmm0, xmm2
paddb xmm0, xmm5
- sub ecx, 16
movdqu [edx], xmm0
lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- paddw xmm0, xmm5
- paddw xmm2, xmm5
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
jg convertloop
ret
}
}
-__declspec(naked) __declspec(align(16))
-void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+__declspec(naked)
+void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kBGRAToY
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kABGRToY
+ movdqa xmm5, xmmword ptr kAddY16
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kBGRAToY
-
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -1064,58 +1342,23 @@ void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
psrlw xmm2, 7
packuswb xmm0, xmm2
paddb xmm0, xmm5
- sub ecx, 16
movdqu [edx], xmm0
lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kABGRToY
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
jg convertloop
ret
}
}
-__declspec(naked) __declspec(align(16))
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+__declspec(naked)
+void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kABGRToY
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kRGBAToY
+ movdqa xmm5, xmmword ptr kAddY16
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -1132,83 +1375,15 @@ void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
psrlw xmm2, 7
packuswb xmm0, xmm2
paddb xmm0, xmm5
- sub ecx, 16
movdqu [edx], xmm0
lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kRGBAToY
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
jg convertloop
ret
}
}
-__declspec(naked) __declspec(align(16))
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kRGBAToY
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -1218,23 +1393,27 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov esi, [esp + 8 + 8] // src_stride_argb
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kARGBToV
+ movdqa xmm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
- align 4
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
+ movdqu xmm0, [eax]
+ movdqu xmm4, [eax + esi]
+ pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
+ movdqu xmm4, [eax + esi + 16]
+ pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
+ movdqu xmm4, [eax + esi + 32]
+ pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
+ movdqu xmm4, [eax + esi + 48]
+ pavgb xmm3, xmm4
+
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
@@ -1262,10 +1441,10 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
- sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
+ sub ecx, 16
jg convertloop
pop edi
@@ -1274,7 +1453,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -1284,23 +1463,27 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov esi, [esp + 8 + 8] // src_stride_argb
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToUJ
- movdqa xmm6, kARGBToVJ
- movdqa xmm5, kAddUVJ128
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUVJ128
+ movdqa xmm6, xmmword ptr kARGBToVJ
+ movdqa xmm7, xmmword ptr kARGBToUJ
sub edi, edx // stride from u to v
- align 4
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
+ movdqu xmm0, [eax]
+ movdqu xmm4, [eax + esi]
+ pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
+ movdqu xmm4, [eax + esi + 16]
+ pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
+ movdqu xmm4, [eax + esi + 32]
+ pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
+ movdqu xmm4, [eax + esi + 48]
+ pavgb xmm3, xmm4
+
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
@@ -1329,10 +1512,10 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
packsswb xmm0, xmm1
// step 3 - store 8 U and 8 V values
- sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
+ sub ecx, 16
jg convertloop
pop edi
@@ -1342,7 +1525,7 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
#ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked) __declspec(align(32))
+__declspec(naked)
void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -1352,13 +1535,12 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
mov esi, [esp + 8 + 8] // src_stride_argb
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- vbroadcastf128 ymm5, kAddUV128
- vbroadcastf128 ymm6, kARGBToV
- vbroadcastf128 ymm7, kARGBToU
+ mov ecx, [esp + 8 + 20] // width
+ vbroadcastf128 ymm5, xmmword ptr kAddUV128
+ vbroadcastf128 ymm6, xmmword ptr kARGBToV
+ vbroadcastf128 ymm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
- align 4
convertloop:
/* step 1 - subsample 32x2 argb pixels to 16x1 */
vmovdqu ymm0, [eax]
@@ -1390,14 +1572,14 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
vpsraw ymm0, ymm0, 8
vpacksswb ymm0, ymm1, ymm0 // mutates
vpermq ymm0, ymm0, 0xd8 // For vpacksswb
- vpshufb ymm0, ymm0, kShufARGBToUV_AVX // For vshufps + vphaddw
+ vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw
vpaddb ymm0, ymm0, ymm5 // -> unsigned
// step 3 - store 16 U and 16 V values
- sub ecx, 32
vextractf128 [edx], ymm0, 0 // U
vextractf128 [edx + edi], ymm0, 1 // V
lea edx, [edx + 16]
+ sub ecx, 32
jg convertloop
pop edi
@@ -1408,148 +1590,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
}
#endif // HAS_ARGBTOUVROW_AVX2
-__declspec(naked) __declspec(align(16))
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToUJ
- movdqa xmm6, kARGBToVJ
- movdqa xmm5, kAddUVJ128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- paddw xmm0, xmm5 // +.5 rounding -> unsigned
- paddw xmm1, xmm5
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -1557,71 +1598,12 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
mov eax, [esp + 4 + 4] // src_argb
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
+ mov ecx, [esp + 4 + 16] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kARGBToV
+ movdqa xmm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
- align 4
- convertloop:
- /* convert to U and V */
- movdqa xmm0, [eax] // U
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm7
- pmaddubsw xmm1, xmm7
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm3, xmm7
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
-
- movdqa xmm0, [eax] // V
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm6
- pmaddubsw xmm1, xmm6
- pmaddubsw xmm2, xmm6
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- lea eax, [eax + 64]
- movdqa [edx + edi], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
convertloop:
/* convert to U and V */
movdqu xmm0, [eax] // U
@@ -1638,7 +1620,6 @@ void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
psraw xmm2, 8
packsswb xmm0, xmm2
paddb xmm0, xmm5
- sub ecx, 16
movdqu [edx], xmm0
movdqu xmm0, [eax] // V
@@ -1658,6 +1639,7 @@ void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
lea eax, [eax + 64]
movdqu [edx + edi], xmm0
lea edx, [edx + 16]
+ sub ecx, 16
jg convertloop
pop edi
@@ -1665,7 +1647,7 @@ void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -1673,72 +1655,12 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
mov eax, [esp + 4 + 4] // src_argb
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
+ mov ecx, [esp + 4 + 16] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kARGBToV
+ movdqa xmm7, xmmword ptr kARGBToU
sub edi, edx // stride from u to v
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
movdqu xmm0, [eax]
@@ -1772,10 +1694,10 @@ void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
- sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
+ sub ecx, 16
jg convertloop
pop edi
@@ -1783,7 +1705,7 @@ void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -1793,93 +1715,27 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov esi, [esp + 8 + 8] // src_stride_argb
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kBGRAToU
- movdqa xmm6, kBGRAToV
- movdqa xmm5, kAddUV128
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kBGRAToV
+ movdqa xmm7, xmmword ptr kBGRAToU
sub edi, edx // stride from u to v
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kBGRAToU
- movdqa xmm6, kBGRAToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
movdqu xmm4, [eax + esi]
pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
movdqu xmm4, [eax + esi + 16]
pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
movdqu xmm4, [eax + esi + 32]
pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
movdqu xmm4, [eax + esi + 48]
pavgb xmm3, xmm4
+
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
@@ -1907,10 +1763,10 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
- sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
+ sub ecx, 16
jg convertloop
pop edi
@@ -1919,7 +1775,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -1929,93 +1785,27 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov esi, [esp + 8 + 8] // src_stride_argb
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kABGRToU
- movdqa xmm6, kABGRToV
- movdqa xmm5, kAddUV128
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kABGRToV
+ movdqa xmm7, xmmword ptr kABGRToU
sub edi, edx // stride from u to v
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kABGRToU
- movdqa xmm6, kABGRToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
movdqu xmm4, [eax + esi]
pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
movdqu xmm4, [eax + esi + 16]
pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
movdqu xmm4, [eax + esi + 32]
pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
movdqu xmm4, [eax + esi + 48]
pavgb xmm3, xmm4
+
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
@@ -2043,10 +1833,10 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
- sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
+ sub ecx, 16
jg convertloop
pop edi
@@ -2055,7 +1845,7 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
@@ -2065,93 +1855,27 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov esi, [esp + 8 + 8] // src_stride_argb
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kRGBAToU
- movdqa xmm6, kRGBAToV
- movdqa xmm5, kAddUV128
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kRGBAToV
+ movdqa xmm7, xmmword ptr kRGBAToU
sub edi, edx // stride from u to v
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kRGBAToU
- movdqa xmm6, kRGBAToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
movdqu xmm4, [eax + esi]
pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
movdqu xmm4, [eax + esi + 16]
pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
movdqu xmm4, [eax + esi + 32]
pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
movdqu xmm4, [eax + esi + 48]
pavgb xmm3, xmm4
+
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
@@ -2179,10 +1903,10 @@ void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
- sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
+ sub ecx, 16
jg convertloop
pop edi
@@ -2192,119 +1916,505 @@ void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
#endif // HAS_ARGBTOYROW_SSSE3
+// Read 16 UV from 444
+#define READYUV444_AVX2 __asm { \
+ __asm vmovdqu xmm0, [esi] /* U */ \
+ __asm vmovdqu xmm1, [esi + edi] /* V */ \
+ __asm lea esi, [esi + 16] \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpermq ymm1, ymm1, 0xd8 \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16] \
+ }
+
+// Read 8 UV from 422, upsample to 16 UV.
+#define READYUV422_AVX2 __asm { \
+ __asm vmovq xmm0, qword ptr [esi] /* U */ \
+ __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
+ __asm lea esi, [esi + 8] \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16] \
+ }
+
+// Read 8 UV from 422, upsample to 16 UV. With 16 Alpha.
+#define READYUVA422_AVX2 __asm { \
+ __asm vmovq xmm0, qword ptr [esi] /* U */ \
+ __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
+ __asm lea esi, [esi + 8] \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16] \
+ __asm vmovdqu xmm5, [ebp] /* A */ \
+ __asm vpermq ymm5, ymm5, 0xd8 \
+ __asm lea ebp, [ebp + 16] \
+ }
+
+// Read 4 UV from 411, upsample to 16 UV.
+#define READYUV411_AVX2 __asm { \
+ __asm vmovd xmm0, dword ptr [esi] /* U */ \
+ __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \
+ __asm lea esi, [esi + 4] \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
+ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16] \
+ }
+
+// Read 8 UV from NV12, upsample to 16 UV.
+#define READNV12_AVX2 __asm { \
+ __asm vmovdqu xmm0, [esi] /* UV */ \
+ __asm lea esi, [esi + 16] \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16] \
+ }
+
+// Read 8 UV from NV21, upsample to 16 UV.
+#define READNV21_AVX2 __asm { \
+ __asm vmovdqu xmm0, [esi] /* UV */ \
+ __asm lea esi, [esi + 16] \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16] \
+ }
+
+// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
+#define READYUY2_AVX2 __asm { \
+ __asm vmovdqu ymm4, [eax] /* YUY2 */ \
+ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \
+ __asm vmovdqu ymm0, [eax] /* UV */ \
+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \
+ __asm lea eax, [eax + 32] \
+ }
+
+// Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.
+#define READUYVY_AVX2 __asm { \
+ __asm vmovdqu ymm4, [eax] /* UYVY */ \
+ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleUYVYY \
+ __asm vmovdqu ymm0, [eax] /* UV */ \
+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleUYVYUV \
+ __asm lea eax, [eax + 32] \
+ }
+
+// Convert 16 pixels: 16 UV and 16 Y.
+#define YUVTORGB_AVX2(YuvConstants) __asm { \
+ __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\
+ __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\
+ __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\
+ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \
+ __asm vpsubw ymm2, ymm3, ymm2 \
+ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \
+ __asm vpsubw ymm1, ymm3, ymm1 \
+ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \
+ __asm vpsubw ymm0, ymm3, ymm0 \
+ /* Step 2: Find Y contribution to 16 R,G,B values */ \
+ __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \
+ __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \
+ __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \
+ __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \
+ __asm vpsraw ymm0, ymm0, 6 \
+ __asm vpsraw ymm1, ymm1, 6 \
+ __asm vpsraw ymm2, ymm2, 6 \
+ __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
+ __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
+ __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
+ }
+
+// Store 16 ARGB values.
+#define STOREARGB_AVX2 __asm { \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* BG */ \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \
+ __asm vpermq ymm2, ymm2, 0xd8 \
+ __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \
+ __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \
+ __asm vmovdqu 0[edx], ymm1 \
+ __asm vmovdqu 32[edx], ymm0 \
+ __asm lea edx, [edx + 64] \
+ }
+
+// Store 16 RGBA values.
+#define STORERGBA_AVX2 __asm { \
+ __asm vpunpcklbw ymm1, ymm1, ymm2 /* GR */ \
+ __asm vpermq ymm1, ymm1, 0xd8 \
+ __asm vpunpcklbw ymm2, ymm5, ymm0 /* AB */ \
+ __asm vpermq ymm2, ymm2, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm2, ymm1 /* ABGR first 8 pixels */ \
+ __asm vpunpckhwd ymm1, ymm2, ymm1 /* ABGR next 8 pixels */ \
+ __asm vmovdqu [edx], ymm0 \
+ __asm vmovdqu [edx + 32], ymm1 \
+ __asm lea edx, [edx + 64] \
+ }
+
#ifdef HAS_I422TOARGBROW_AVX2
-
-static const lvec8 kUVToB_AVX = {
- UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
- UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
-};
-static const lvec8 kUVToR_AVX = {
- UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR,
- UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
-};
-static const lvec8 kUVToG_AVX = {
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
-};
-static const lvec16 kYToRgb_AVX = {
- YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
-};
-static const lvec16 kYSub16_AVX = {
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
-};
-static const lvec16 kUVBiasB_AVX = {
- BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
-};
-static const lvec16 kUVBiasG_AVX = {
- BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG
-};
-static const lvec16 kUVBiasR_AVX = {
- BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
-};
-
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
- vpxor ymm4, ymm4, ymm4
- align 4
convertloop:
- vmovq xmm0, qword ptr [esi] // U
- vmovq xmm1, qword ptr [esi + edi] // V
- lea esi, [esi + 8]
- vpunpcklbw ymm0, ymm0, ymm1 // UV
- vpermq ymm0, ymm0, 0xd8
- vpunpcklwd ymm0, ymm0, ymm0 // UVUV
- vpmaddubsw ymm2, ymm0, kUVToB_AVX // scale B UV
- vpmaddubsw ymm1, ymm0, kUVToG_AVX // scale G UV
- vpmaddubsw ymm0, ymm0, kUVToR_AVX // scale R UV
- vpsubw ymm2, ymm2, kUVBiasB_AVX // unbias back to signed
- vpsubw ymm1, ymm1, kUVBiasG_AVX
- vpsubw ymm0, ymm0, kUVBiasR_AVX
+ READYUV422_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
- // Step 2: Find Y contribution to 16 R,G,B values
- vmovdqu xmm3, [eax] // NOLINT
- lea eax, [eax + 16]
- vpermq ymm3, ymm3, 0xd8
- vpunpcklbw ymm3, ymm3, ymm4
- vpsubsw ymm3, ymm3, kYSub16_AVX
- vpmullw ymm3, ymm3, kYToRgb_AVX
- vpaddsw ymm2, ymm2, ymm3 // B += Y
- vpaddsw ymm1, ymm1, ymm3 // G += Y
- vpaddsw ymm0, ymm0, ymm3 // R += Y
- vpsraw ymm2, ymm2, 6
- vpsraw ymm1, ymm1, 6
- vpsraw ymm0, ymm0, 6
- vpackuswb ymm2, ymm2, ymm2 // B
- vpackuswb ymm1, ymm1, ymm1 // G
- vpackuswb ymm0, ymm0, ymm0 // R
-
- // Step 3: Weave into ARGB
- vpunpcklbw ymm2, ymm2, ymm1 // BG
- vpermq ymm2, ymm2, 0xd8
- vpunpcklbw ymm0, ymm0, ymm5 // RA
- vpermq ymm0, ymm0, 0xd8
- vpunpcklwd ymm1, ymm2, ymm0 // BGRA first 8 pixels
- vpunpckhwd ymm2, ymm2, ymm0 // BGRA next 8 pixels
- vmovdqu [edx], ymm1
- vmovdqu [edx + 32], ymm2
- lea edx, [edx + 64]
sub ecx, 16
jg convertloop
- vzeroupper
+ pop ebx
pop edi
pop esi
+ vzeroupper
ret
}
}
#endif // HAS_I422TOARGBROW_AVX2
-#ifdef HAS_I422TOARGBROW_SSSE3
+#ifdef HAS_I422ALPHATOARGBROW_AVX2
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
+__declspec(naked)
+void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ push ebp
+ mov eax, [esp + 16 + 4] // Y
+ mov esi, [esp + 16 + 8] // U
+ mov edi, [esp + 16 + 12] // V
+ mov ebp, [esp + 16 + 16] // A
+ mov edx, [esp + 16 + 20] // argb
+ mov ebx, [esp + 16 + 24] // yuvconstants
+ mov ecx, [esp + 16 + 28] // width
+ sub edi, esi
+ convertloop:
+ READYUVA422_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebp
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I422ALPHATOARGBROW_AVX2
+
+#ifdef HAS_I444TOARGBROW_AVX2
+// 16 pixels
+// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked)
+void I444ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+ convertloop:
+ READYUV444_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I444TOARGBROW_AVX2
+
+#ifdef HAS_I411TOARGBROW_AVX2
+// 16 pixels
+// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked)
+void I411ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // abgr
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READYUV411_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I411TOARGBROW_AVX2
+
+#ifdef HAS_NV12TOARGBROW_AVX2
+// 16 pixels.
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked)
+void NV12ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* uv_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push ebx
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // UV
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
+ mov ecx, [esp + 8 + 20] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READNV12_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_NV12TOARGBROW_AVX2
+
+#ifdef HAS_NV21TOARGBROW_AVX2
+// 16 pixels.
+// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked)
+void NV21ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* vu_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push ebx
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // VU
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
+ mov ecx, [esp + 8 + 20] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READNV21_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_NV21TOARGBROW_AVX2
+
+#ifdef HAS_YUY2TOARGBROW_AVX2
+// 16 pixels.
+// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+__declspec(naked)
+void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push ebx
+ mov eax, [esp + 4 + 4] // yuy2
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READYUY2_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_YUY2TOARGBROW_AVX2
+
+#ifdef HAS_UYVYTOARGBROW_AVX2
+// 16 pixels.
+// 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+__declspec(naked)
+void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push ebx
+ mov eax, [esp + 4 + 4] // uyvy
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READUYVY_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_UYVYTOARGBROW_AVX2
+
+#ifdef HAS_I422TORGBAROW_AVX2
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
+__declspec(naked)
+void I422ToRGBARow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // abgr
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READYUV422_AVX2
+ YUVTORGB_AVX2(ebx)
+ STORERGBA_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I422TORGBAROW_AVX2
+
+#if defined(HAS_I422TOARGBROW_SSSE3)
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
+// Allows a conversion with half size scaling.
// Read 8 UV from 444.
#define READYUV444 __asm { \
- __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \
- __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \
+ __asm movq xmm0, qword ptr [esi] /* U */ \
+ __asm movq xmm1, qword ptr [esi + edi] /* V */ \
__asm lea esi, [esi + 8] \
__asm punpcklbw xmm0, xmm1 /* UV */ \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8] \
}
// Read 4 UV from 422, upsample to 8 UV.
@@ -2314,47 +2424,99 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
__asm lea esi, [esi + 4] \
__asm punpcklbw xmm0, xmm1 /* UV */ \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8] \
+ }
+
+// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
+#define READYUVA422 __asm { \
+ __asm movd xmm0, [esi] /* U */ \
+ __asm movd xmm1, [esi + edi] /* V */ \
+ __asm lea esi, [esi + 4] \
+ __asm punpcklbw xmm0, xmm1 /* UV */ \
+ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm movq xmm4, qword ptr [eax] /* Y */ \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8] \
+ __asm movq xmm5, qword ptr [ebp] /* A */ \
+ __asm lea ebp, [ebp + 8] \
}
// Read 2 UV from 411, upsample to 8 UV.
-#define READYUV411 __asm { \
- __asm movzx ebx, word ptr [esi] /* U */ /* NOLINT */ \
+// drmemory fails with memory fault if pinsrw used. libyuv bug: 525
+// __asm pinsrw xmm0, [esi], 0 /* U */
+// __asm pinsrw xmm1, [esi + edi], 0 /* V */
+#define READYUV411_EBX __asm { \
+ __asm movzx ebx, word ptr [esi] /* U */ \
__asm movd xmm0, ebx \
- __asm movzx ebx, word ptr [esi + edi] /* V */ /* NOLINT */ \
+ __asm movzx ebx, word ptr [esi + edi] /* V */ \
__asm movd xmm1, ebx \
__asm lea esi, [esi + 2] \
- __asm punpcklbw xmm0, xmm1 /* UV */ \
- __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
- __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm punpcklbw xmm0, xmm1 /* UV */ \
+ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8] \
}
// Read 4 UV from NV12, upsample to 8 UV.
#define READNV12 __asm { \
- __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \
+ __asm movq xmm0, qword ptr [esi] /* UV */ \
__asm lea esi, [esi + 8] \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8] \
+ }
+
+// Read 4 VU from NV21, upsample to 8 UV.
+#define READNV21 __asm { \
+ __asm movq xmm0, qword ptr [esi] /* UV */ \
+ __asm lea esi, [esi + 8] \
+ __asm pshufb xmm0, xmmword ptr kShuffleNV21 \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8] \
+ }
+
+// Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV.
+#define READYUY2 __asm { \
+ __asm movdqu xmm4, [eax] /* YUY2 */ \
+ __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \
+ __asm movdqu xmm0, [eax] /* UV */ \
+ __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \
+ __asm lea eax, [eax + 16] \
+ }
+
+// Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV.
+#define READUYVY __asm { \
+ __asm movdqu xmm4, [eax] /* UYVY */ \
+ __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \
+ __asm movdqu xmm0, [eax] /* UV */ \
+ __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \
+ __asm lea eax, [eax + 16] \
}
// Convert 8 pixels: 8 UV and 8 Y.
-#define YUVTORGB __asm { \
- /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
+#define YUVTORGB(YuvConstants) __asm { \
__asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \
- __asm pmaddubsw xmm0, kUVToB /* scale B UV */ \
- __asm pmaddubsw xmm1, kUVToG /* scale G UV */ \
- __asm pmaddubsw xmm2, kUVToR /* scale R UV */ \
- __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
- __asm psubw xmm1, kUVBiasG \
- __asm psubw xmm2, kUVBiasR \
- /* Step 2: Find Y contribution to 8 R,G,B values */ \
- __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
- __asm lea eax, [eax + 8] \
- __asm punpcklbw xmm3, xmm4 \
- __asm psubsw xmm3, kYSub16 \
- __asm pmullw xmm3, kYToRgb \
- __asm paddsw xmm0, xmm3 /* B += Y */ \
- __asm paddsw xmm1, xmm3 /* G += Y */ \
- __asm paddsw xmm2, xmm3 /* R += Y */ \
+ __asm movdqa xmm3, xmm0 \
+ __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \
+ __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \
+ __asm psubw xmm0, xmm1 \
+ __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \
+ __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \
+ __asm psubw xmm1, xmm2 \
+ __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \
+ __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \
+ __asm psubw xmm2, xmm3 \
+ __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \
+ __asm paddsw xmm0, xmm4 /* B += Y */ \
+ __asm paddsw xmm1, xmm4 /* G += Y */ \
+ __asm paddsw xmm2, xmm4 /* R += Y */ \
__asm psraw xmm0, 6 \
__asm psraw xmm1, 6 \
__asm psraw xmm2, 6 \
@@ -2363,189 +2525,192 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
__asm packuswb xmm2, xmm2 /* R */ \
}
-// Convert 8 pixels: 8 VU and 8 Y.
-#define YVUTORGB __asm { \
- /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
+// Store 8 ARGB values.
+#define STOREARGB __asm { \
+ __asm punpcklbw xmm0, xmm1 /* BG */ \
+ __asm punpcklbw xmm2, xmm5 /* RA */ \
__asm movdqa xmm1, xmm0 \
- __asm movdqa xmm2, xmm0 \
- __asm pmaddubsw xmm0, kVUToB /* scale B UV */ \
- __asm pmaddubsw xmm1, kVUToG /* scale G UV */ \
- __asm pmaddubsw xmm2, kVUToR /* scale R UV */ \
- __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
- __asm psubw xmm1, kUVBiasG \
- __asm psubw xmm2, kUVBiasR \
- /* Step 2: Find Y contribution to 8 R,G,B values */ \
- __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
- __asm lea eax, [eax + 8] \
- __asm punpcklbw xmm3, xmm4 \
- __asm psubsw xmm3, kYSub16 \
- __asm pmullw xmm3, kYToRgb \
- __asm paddsw xmm0, xmm3 /* B += Y */ \
- __asm paddsw xmm1, xmm3 /* G += Y */ \
- __asm paddsw xmm2, xmm3 /* R += Y */ \
- __asm psraw xmm0, 6 \
- __asm psraw xmm1, 6 \
- __asm psraw xmm2, 6 \
- __asm packuswb xmm0, xmm0 /* B */ \
- __asm packuswb xmm1, xmm1 /* G */ \
- __asm packuswb xmm2, xmm2 /* R */ \
+ __asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \
+ __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \
+ __asm movdqu 0[edx], xmm0 \
+ __asm movdqu 16[edx], xmm1 \
+ __asm lea edx, [edx + 32] \
}
-// 8 pixels, dest aligned 16.
+// Store 8 BGRA values.
+#define STOREBGRA __asm { \
+ __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \
+ __asm punpcklbw xmm1, xmm0 /* GB */ \
+ __asm punpcklbw xmm5, xmm2 /* AR */ \
+ __asm movdqa xmm0, xmm5 \
+ __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \
+ __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \
+ __asm movdqu 0[edx], xmm5 \
+ __asm movdqu 16[edx], xmm0 \
+ __asm lea edx, [edx + 32] \
+ }
+
+// Store 8 RGBA values.
+#define STORERGBA __asm { \
+ __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \
+ __asm punpcklbw xmm1, xmm2 /* GR */ \
+ __asm punpcklbw xmm5, xmm0 /* AB */ \
+ __asm movdqa xmm0, xmm5 \
+ __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \
+ __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \
+ __asm movdqu 0[edx], xmm5 \
+ __asm movdqu 16[edx], xmm0 \
+ __asm lea edx, [edx + 32] \
+ }
+
+// Store 8 RGB24 values.
+#define STORERGB24 __asm { \
+ /* Weave into RRGB */ \
+ __asm punpcklbw xmm0, xmm1 /* BG */ \
+ __asm punpcklbw xmm2, xmm2 /* RR */ \
+ __asm movdqa xmm1, xmm0 \
+ __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
+ __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
+ /* RRGB -> RGB24 */ \
+ __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \
+ __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
+ __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
+ __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \
+ __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
+ __asm lea edx, [edx + 24] \
+ }
+
+// Store 8 RGB565 values.
+#define STORERGB565 __asm { \
+ /* Weave into RRGB */ \
+ __asm punpcklbw xmm0, xmm1 /* BG */ \
+ __asm punpcklbw xmm2, xmm2 /* RR */ \
+ __asm movdqa xmm1, xmm0 \
+ __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
+ __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
+ /* RRGB -> RGB565 */ \
+ __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \
+ __asm movdqa xmm2, xmm0 /* G */ \
+ __asm pslld xmm0, 8 /* R */ \
+ __asm psrld xmm3, 3 /* B */ \
+ __asm psrld xmm2, 5 /* G */ \
+ __asm psrad xmm0, 16 /* R */ \
+ __asm pand xmm3, xmm5 /* B */ \
+ __asm pand xmm2, xmm6 /* G */ \
+ __asm pand xmm0, xmm7 /* R */ \
+ __asm por xmm3, xmm2 /* BG */ \
+ __asm por xmm0, xmm3 /* BGR */ \
+ __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \
+ __asm movdqa xmm2, xmm1 /* G */ \
+ __asm pslld xmm1, 8 /* R */ \
+ __asm psrld xmm3, 3 /* B */ \
+ __asm psrld xmm2, 5 /* G */ \
+ __asm psrad xmm1, 16 /* R */ \
+ __asm pand xmm3, xmm5 /* B */ \
+ __asm pand xmm2, xmm6 /* G */ \
+ __asm pand xmm1, xmm7 /* R */ \
+ __asm por xmm3, xmm2 /* BG */ \
+ __asm por xmm1, xmm3 /* BGR */ \
+ __asm packssdw xmm0, xmm1 \
+ __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \
+ __asm lea edx, [edx + 16] \
+ }
+
+// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- align 4
convertloop:
READYUV444
- YUVTORGB
+ YUVTORGB(ebx)
+ STOREARGB
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
sub ecx, 8
jg convertloop
+ pop ebx
pop edi
pop esi
ret
}
}
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+// 8 pixels.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
+__declspec(naked)
void I422ToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb24
- mov ecx, [esp + 8 + 20] // width
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
- pxor xmm4, xmm4
- movdqa xmm5, kShuffleMaskARGBToRGB24_0
- movdqa xmm6, kShuffleMaskARGBToRGB24
+ movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
- align 4
convertloop:
READYUV422
- YUVTORGB
+ YUVTORGB(ebx)
+ STORERGB24
- // Step 3: Weave into RRGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm2 // RR
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRR first 4 pixels
- punpckhwd xmm1, xmm2 // BGRR next 4 pixels
- pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
- pshufb xmm1, xmm6 // Pack into first 12 bytes.
- palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
- movq qword ptr [edx], xmm0 // First 8 bytes
- movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
- lea edx, [edx + 24]
sub ecx, 8
jg convertloop
+ pop ebx
pop edi
pop esi
ret
}
}
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToRAWRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_raw,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // raw
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
- movdqa xmm5, kShuffleMaskARGBToRAW_0
- movdqa xmm6, kShuffleMaskARGBToRAW
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into RRGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm2 // RR
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRR first 4 pixels
- punpckhwd xmm1, xmm2 // BGRR next 4 pixels
- pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
- pshufb xmm1, xmm6 // Pack into first 12 bytes.
- palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
- movq qword ptr [edx], xmm0 // First 8 bytes
- movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
- lea edx, [edx + 24]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest unaligned.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+// 8 pixels
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
+__declspec(naked)
void I422ToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb565_buf,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb565
- mov ecx, [esp + 8 + 20] // width
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
- pxor xmm4, xmm4
pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
psrld xmm5, 27
pcmpeqb xmm6, xmm6 // generate mask 0x000007e0
@@ -2554,701 +2719,320 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
pcmpeqb xmm7, xmm7 // generate mask 0xfffff800
pslld xmm7, 11
- align 4
convertloop:
READYUV422
- YUVTORGB
+ YUVTORGB(ebx)
+ STORERGB565
- // Step 3: Weave into RRGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm2 // RR
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRR first 4 pixels
- punpckhwd xmm1, xmm2 // BGRR next 4 pixels
-
- // Step 3b: RRGB -> RGB565
- movdqa xmm3, xmm0 // B first 4 pixels of argb
- movdqa xmm2, xmm0 // G
- pslld xmm0, 8 // R
- psrld xmm3, 3 // B
- psrld xmm2, 5 // G
- psrad xmm0, 16 // R
- pand xmm3, xmm5 // B
- pand xmm2, xmm6 // G
- pand xmm0, xmm7 // R
- por xmm3, xmm2 // BG
- por xmm0, xmm3 // BGR
- movdqa xmm3, xmm1 // B next 4 pixels of argb
- movdqa xmm2, xmm1 // G
- pslld xmm1, 8 // R
- psrld xmm3, 3 // B
- psrld xmm2, 5 // G
- psrad xmm1, 16 // R
- pand xmm3, xmm5 // B
- pand xmm2, xmm6 // G
- pand xmm1, xmm7 // R
- por xmm3, xmm2 // BG
- por xmm1, xmm3 // BGR
- packssdw xmm0, xmm1
sub ecx, 8
- movdqu [edx], xmm0 // store 8 pixels of RGB565
- lea edx, [edx + 16]
jg convertloop
+ pop ebx
pop edi
pop esi
ret
}
}
-// 8 pixels, dest aligned 16.
+// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
- align 4
convertloop:
READYUV422
- YUVTORGB
+ YUVTORGB(ebx)
+ STOREARGB
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
sub ecx, 8
jg convertloop
+ pop ebx
pop edi
pop esi
ret
}
}
-// 8 pixels, dest aligned 16.
+// 8 pixels.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB.
+__declspec(naked)
+void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ const uint8* a_buf,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ push ebp
+ mov eax, [esp + 16 + 4] // Y
+ mov esi, [esp + 16 + 8] // U
+ mov edi, [esp + 16 + 12] // V
+ mov ebp, [esp + 16 + 16] // A
+ mov edx, [esp + 16 + 20] // argb
+ mov ebx, [esp + 16 + 24] // yuvconstants
+ mov ecx, [esp + 16 + 28] // width
+ sub edi, esi
+
+ convertloop:
+ READYUVA422
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebp
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
- push ebx
push esi
push edi
- mov eax, [esp + 12 + 4] // Y
- mov esi, [esp + 12 + 8] // U
- mov edi, [esp + 12 + 12] // V
- mov edx, [esp + 12 + 16] // argb
- mov ecx, [esp + 12 + 20] // width
+ push ebx
+ push ebp
+ mov eax, [esp + 16 + 4] // Y
+ mov esi, [esp + 16 + 8] // U
+ mov edi, [esp + 16 + 12] // V
+ mov edx, [esp + 16 + 16] // abgr
+ mov ebp, [esp + 16 + 20] // yuvconstants
+ mov ecx, [esp + 16 + 24] // width
sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- align 4
convertloop:
- READYUV411 // modifies EBX
- YUVTORGB
+ READYUV411_EBX
+ YUVTORGB(ebp)
+ STOREARGB
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
sub ecx, 8
jg convertloop
+ pop ebp
+ pop ebx
pop edi
pop esi
- pop ebx
ret
}
}
-// 8 pixels, dest aligned 16.
+// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
+ push ebx
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // UV
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
+ mov ecx, [esp + 8 + 20] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
- align 4
convertloop:
READNV12
- YUVTORGB
+ YUVTORGB(ebx)
+ STOREARGB
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
sub ecx, 8
jg convertloop
+ pop ebx
pop esi
ret
}
}
-// 8 pixels, dest aligned 16.
+// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
+ const uint8* vu_buf,
uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // VU
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READNV12
- YVUTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-// 8 pixels, unaligned.
-// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
+ push ebx
mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
+ mov esi, [esp + 8 + 8] // VU
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
mov ecx, [esp + 8 + 20] // width
- sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
- align 4
convertloop:
- READYUV444
- YUVTORGB
+ READNV21
+ YUVTORGB(ebx)
+ STOREARGB
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
sub ecx, 8
jg convertloop
- pop edi
+ pop ebx
pop esi
ret
}
}
-// 8 pixels, unaligned.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, unaligned.
-// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-// Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
-void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
+// 8 pixels.
+// 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
+__declspec(naked)
+void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push ebx
- push esi
- push edi
- mov eax, [esp + 12 + 4] // Y
- mov esi, [esp + 12 + 8] // U
- mov edi, [esp + 12 + 12] // V
- mov edx, [esp + 12 + 16] // argb
- mov ecx, [esp + 12 + 20] // width
- sub edi, esi
+ mov eax, [esp + 4 + 4] // yuy2
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
- align 4
convertloop:
- READYUV411 // modifies EBX
- YUVTORGB
+ READYUY2
+ YUVTORGB(ebx)
+ STOREARGB
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
sub ecx, 8
jg convertloop
- pop edi
- pop esi
pop ebx
ret
}
}
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READNV12
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // VU
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READNV12
- YVUTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToBGRARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
+// 8 pixels.
+// 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
+__declspec(naked)
+void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
+ uint8* dst_argb,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // bgra
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into BGRA
+ push ebx
+ mov eax, [esp + 4 + 4] // uyvy
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm0 // GB
- punpcklbw xmm5, xmm2 // AR
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // BGRA first 4 pixels
- punpckhwd xmm0, xmm1 // BGRA next 4 pixels
- movdqa [edx], xmm5
- movdqa [edx + 16], xmm0
- lea edx, [edx + 32]
+
+ convertloop:
+ READUYVY
+ YUVTORGB(ebx)
+ STOREARGB
+
sub ecx, 8
jg convertloop
- pop edi
- pop esi
+ pop ebx
ret
}
}
-__declspec(naked) __declspec(align(16))
-void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // bgra
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into BGRA
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm0 // GB
- punpcklbw xmm5, xmm2 // AR
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // BGRA first 4 pixels
- punpckhwd xmm0, xmm1 // BGRA next 4 pixels
- movdqu [edx], xmm5
- movdqu [edx + 16], xmm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToABGRRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // abgr
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm2, xmm1 // RG
- punpcklbw xmm0, xmm5 // BA
- movdqa xmm1, xmm2
- punpcklwd xmm2, xmm0 // RGBA first 4 pixels
- punpckhwd xmm1, xmm0 // RGBA next 4 pixels
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // abgr
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm2, xmm1 // RG
- punpcklbw xmm0, xmm5 // BA
- movdqa xmm1, xmm2
- punpcklwd xmm2, xmm0 // RGBA first 4 pixels
- punpckhwd xmm1, xmm0 // RGBA next 4 pixels
- movdqu [edx], xmm2
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgba,
+ const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgba
- mov ecx, [esp + 8 + 20] // width
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
- pxor xmm4, xmm4
- align 4
convertloop:
READYUV422
- YUVTORGB
+ YUVTORGB(ebx)
+ STORERGBA
- // Step 3: Weave into RGBA
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm2 // GR
- punpcklbw xmm5, xmm0 // AB
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // RGBA first 4 pixels
- punpckhwd xmm0, xmm1 // RGBA next 4 pixels
- movdqa [edx], xmm5
- movdqa [edx + 16], xmm0
- lea edx, [edx + 32]
sub ecx, 8
jg convertloop
+ pop ebx
pop edi
pop esi
ret
}
}
-
-__declspec(naked) __declspec(align(16))
-void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgba
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into RGBA
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm2 // GR
- punpcklbw xmm5, xmm0 // AB
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // RGBA first 4 pixels
- punpckhwd xmm0, xmm1 // RGBA next 4 pixels
- movdqu [edx], xmm5
- movdqu [edx + 16], xmm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
#endif // HAS_I422TOARGBROW_SSSE3
-#ifdef HAS_YTOARGBROW_SSE2
-__declspec(naked) __declspec(align(16))
-void YToARGBRow_SSE2(const uint8* y_buf,
- uint8* rgb_buf,
- int width) {
+#ifdef HAS_I400TOARGBROW_SSE2
+// 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
+__declspec(naked)
+void I400ToARGBRow_SSE2(const uint8* y_buf,
+ uint8* rgb_buf,
+ int width) {
__asm {
- pxor xmm5, xmm5
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
- mov eax, 0x00100010
- movd xmm3, eax
- pshufd xmm3, xmm3, 0
- mov eax, 0x004a004a // 74
+ mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
movd xmm2, eax
pshufd xmm2, xmm2,0
+ mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
+ movd xmm3, eax
+ pshufd xmm3, xmm3, 0
+ pcmpeqb xmm4, xmm4 // generate mask 0xff000000
+ pslld xmm4, 24
+
mov eax, [esp + 4] // Y
mov edx, [esp + 8] // rgb
mov ecx, [esp + 12] // width
- align 4
convertloop:
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
movq xmm0, qword ptr [eax]
lea eax, [eax + 8]
- punpcklbw xmm0, xmm5 // 0.Y
+ punpcklbw xmm0, xmm0 // Y.Y
+ pmulhuw xmm0, xmm2
psubusw xmm0, xmm3
- pmullw xmm0, xmm2
psrlw xmm0, 6
packuswb xmm0, xmm0 // G
@@ -3259,16 +3043,66 @@ void YToARGBRow_SSE2(const uint8* y_buf,
punpckhwd xmm1, xmm1 // BGRA next 4 pixels
por xmm0, xmm4
por xmm1, xmm4
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
-
ret
}
}
-#endif // HAS_YTOARGBROW_SSE2
+#endif // HAS_I400TOARGBROW_SSE2
+
+#ifdef HAS_I400TOARGBROW_AVX2
+// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
+// note: vpunpcklbw mutates and vpackuswb unmutates.
+__declspec(naked)
+void I400ToARGBRow_AVX2(const uint8* y_buf,
+ uint8* rgb_buf,
+ int width) {
+ __asm {
+ mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
+ vmovd xmm2, eax
+ vbroadcastss ymm2, xmm2
+ mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
+ vmovd xmm3, eax
+ vbroadcastss ymm3, xmm3
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000
+ vpslld ymm4, ymm4, 24
+
+ mov eax, [esp + 4] // Y
+ mov edx, [esp + 8] // rgb
+ mov ecx, [esp + 12] // width
+
+ convertloop:
+ // Step 1: Scale Y contriportbution to 16 G values. G = (y - 16) * 1.164
+ vmovdqu xmm0, [eax]
+ lea eax, [eax + 16]
+ vpermq ymm0, ymm0, 0xd8 // vpunpcklbw mutates
+ vpunpcklbw ymm0, ymm0, ymm0 // Y.Y
+ vpmulhuw ymm0, ymm0, ymm2
+ vpsubusw ymm0, ymm0, ymm3
+ vpsrlw ymm0, ymm0, 6
+ vpackuswb ymm0, ymm0, ymm0 // G. still mutated: 3120
+
+ // TODO(fbarchard): Weave alpha with unpack.
+ // Step 2: Weave into ARGB
+ vpunpcklbw ymm1, ymm0, ymm0 // GG - mutates
+ vpermq ymm1, ymm1, 0xd8
+ vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 8 pixels
+ vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 8 pixels
+ vpor ymm0, ymm0, ymm4
+ vpor ymm1, ymm1, ymm4
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I400TOARGBROW_AVX2
#ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes.
@@ -3276,22 +3110,21 @@ static const uvec8 kShuffleMirror = {
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
};
-__declspec(naked) __declspec(align(16))
+// TODO(fbarchard): Replace lea with -16 offset.
+__declspec(naked)
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
- movdqa xmm5, kShuffleMirror
- lea eax, [eax - 16]
+ movdqa xmm5, xmmword ptr kShuffleMirror
- align 4
convertloop:
- movdqa xmm0, [eax + ecx]
+ movdqu xmm0, [eax - 16 + ecx]
pshufb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 16
jg convertloop
ret
}
@@ -3299,29 +3132,21 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
#endif // HAS_MIRRORROW_SSSE3
#ifdef HAS_MIRRORROW_AVX2
-// Shuffle table for reversing the bytes.
-static const ulvec8 kShuffleMirror_AVX2 = {
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u,
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
- vmovdqa ymm5, kShuffleMirror_AVX2
- lea eax, [eax - 32]
+ vbroadcastf128 ymm5, xmmword ptr kShuffleMirror
- align 4
convertloop:
- vmovdqu ymm0, [eax + ecx]
+ vmovdqu ymm0, [eax - 32 + ecx]
vpshufb ymm0, ymm0, ymm5
vpermq ymm0, ymm0, 0x4e // swap high and low halfs
- sub ecx, 32
vmovdqu [edx], ymm0
lea edx, [edx + 32]
+ sub ecx, 32
jg convertloop
vzeroupper
ret
@@ -3329,43 +3154,13 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_MIRRORROW_AVX2
-#ifdef HAS_MIRRORROW_SSE2
-// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
-// version can not.
-__declspec(naked) __declspec(align(16))
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- lea eax, [eax - 16]
-
- align 4
- convertloop:
- movdqu xmm0, [eax + ecx]
- movdqa xmm1, xmm0 // swap bytes
- psllw xmm0, 8
- psrlw xmm1, 8
- por xmm0, xmm1
- pshuflw xmm0, xmm0, 0x1b // swap words
- pshufhw xmm0, xmm0, 0x1b
- pshufd xmm0, xmm0, 0x4e // swap qwords
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-#endif // HAS_MIRRORROW_SSE2
-
#ifdef HAS_MIRRORROW_UV_SSSE3
// Shuffle table for reversing the bytes of UV channels.
static const uvec8 kShuffleMirrorUV = {
14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
};
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
int width) {
__asm {
@@ -3374,19 +3169,18 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // width
- movdqa xmm1, kShuffleMirrorUV
+ movdqa xmm1, xmmword ptr kShuffleMirrorUV
lea eax, [eax + ecx * 2 - 16]
sub edi, edx
- align 4
convertloop:
- movdqa xmm0, [eax]
+ movdqu xmm0, [eax]
lea eax, [eax - 16]
pshufb xmm0, xmm1
- sub ecx, 8
movlpd qword ptr [edx], xmm0
movhpd qword ptr [edx + edi], xmm0
lea edx, [edx + 8]
+ sub ecx, 8
jg convertloop
pop edi
@@ -3395,34 +3189,27 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
}
#endif // HAS_MIRRORROW_UV_SSSE3
-#ifdef HAS_ARGBMIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static const uvec8 kARGBShuffleMirror = {
- 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
-};
-
-__declspec(naked) __declspec(align(16))
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+#ifdef HAS_ARGBMIRRORROW_SSE2
+__declspec(naked)
+void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
lea eax, [eax - 16 + ecx * 4] // last 4 pixels.
- movdqa xmm5, kARGBShuffleMirror
- align 4
convertloop:
- movdqa xmm0, [eax]
+ movdqu xmm0, [eax]
lea eax, [eax - 16]
- pshufb xmm0, xmm5
- sub ecx, 4
- movdqa [edx], xmm0
+ pshufd xmm0, xmm0, 0x1b
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg convertloop
ret
}
}
-#endif // HAS_ARGBMIRRORROW_SSSE3
+#endif // HAS_ARGBMIRRORROW_SSE2
#ifdef HAS_ARGBMIRRORROW_AVX2
// Shuffle table for reversing the bytes.
@@ -3430,21 +3217,19 @@ static const ulvec32 kARGBShuffleMirror_AVX2 = {
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
};
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
- lea eax, [eax - 32]
- vmovdqa ymm5, kARGBShuffleMirror_AVX2
+ vmovdqu ymm5, ymmword ptr kARGBShuffleMirror_AVX2
- align 4
convertloop:
- vpermd ymm0, ymm5, [eax + ecx * 4] // permute dword order
- sub ecx, 8
+ vpermd ymm0, ymm5, [eax - 32 + ecx * 4] // permute dword order
vmovdqu [edx], ymm0
lea edx, [edx + 32]
+ sub ecx, 8
jg convertloop
vzeroupper
ret
@@ -3453,56 +3238,19 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
#endif // HAS_ARGBMIRRORROW_AVX2
#ifdef HAS_SPLITUVROW_SSE2
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+__declspec(naked)
+void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
+ mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- pand xmm0, xmm5 // even bytes
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- psrlw xmm2, 8 // odd bytes
- psrlw xmm3, 8
- packuswb xmm2, xmm3
- movdqa [edx], xmm0
- movdqa [edx + edi], xmm2
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -3525,22 +3273,23 @@ void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
ret
}
}
+
#endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_SPLITUVROW_AVX2
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+__declspec(naked)
+void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
+ mov ecx, [esp + 4 + 16] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
vpsrlw ymm5, ymm5, 8
sub edi, edx
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -3567,7 +3316,7 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#endif // HAS_SPLITUVROW_AVX2
#ifdef HAS_MERGEUVROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
__asm {
@@ -3578,37 +3327,6 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
mov ecx, [esp + 4 + 16] // width
sub edx, eax
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 16 U's
- movdqa xmm1, [eax + edx] // and 16 V's
- lea eax, [eax + 16]
- movdqa xmm2, xmm0
- punpcklbw xmm0, xmm1 // first 8 UV pairs
- punpckhbw xmm2, xmm1 // next 8 UV pairs
- movdqa [edi], xmm0
- movdqa [edi + 16], xmm2
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
- uint8* dst_uv, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_u
- mov edx, [esp + 4 + 8] // src_v
- mov edi, [esp + 4 + 12] // dst_uv
- mov ecx, [esp + 4 + 16] // width
- sub edx, eax
-
- align 4
convertloop:
movdqu xmm0, [eax] // read 16 U's
movdqu xmm1, [eax + edx] // and 16 V's
@@ -3629,7 +3347,7 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
#endif // HAS_MERGEUVROW_SSE2
#ifdef HAS_MERGEUVROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
__asm {
@@ -3640,17 +3358,16 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
mov ecx, [esp + 4 + 16] // width
sub edx, eax
- align 4
convertloop:
vmovdqu ymm0, [eax] // read 32 U's
vmovdqu ymm1, [eax + edx] // and 32 V's
lea eax, [eax + 32]
vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
- vperm2i128 ymm1, ymm2, ymm0, 0x20 // low 128 of ymm2 and low 128 of ymm0
- vperm2i128 ymm2, ymm2, ymm0, 0x31 // high 128 of ymm2 and high 128 of ymm0
- vmovdqu [edi], ymm1
- vmovdqu [edi + 32], ymm2
+ vextractf128 [edi], ymm2, 0 // bytes 0..15
+ vextractf128 [edi + 16], ymm0, 0 // bytes 16..31
+ vextractf128 [edi + 32], ymm2, 1 // bytes 32..47
+ vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
lea edi, [edi + 64]
sub ecx, 32
jg convertloop
@@ -3664,15 +3381,18 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
#ifdef HAS_COPYROW_SSE2
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // count
+ test eax, 15
+ jne convertloopu
+ test edx, 15
+ jne convertloopu
- align 4
- convertloop:
+ convertloopa:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
@@ -3680,14 +3400,50 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 32
- jg convertloop
+ jg convertloopa
+ ret
+
+ convertloopu:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloopu
ret
}
}
#endif // HAS_COPYROW_SSE2
-// Unaligned Multiple of 1.
-__declspec(naked) __declspec(align(16))
+#ifdef HAS_COPYROW_AVX
+// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
+__declspec(naked)
+void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // count
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 64
+ jg convertloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_COPYROW_AVX
+
+// Multiple of 1.
+__declspec(naked)
void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
__asm {
mov eax, esi
@@ -3702,27 +3458,9 @@ void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
}
}
-#ifdef HAS_COPYROW_X86
-__declspec(naked) __declspec(align(16))
-void CopyRow_X86(const uint8* src, uint8* dst, int count) {
- __asm {
- mov eax, esi
- mov edx, edi
- mov esi, [esp + 4] // src
- mov edi, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- shr ecx, 2
- rep movsd
- mov edi, edx
- mov esi, eax
- ret
- }
-}
-#endif // HAS_COPYROW_X86
-
#ifdef HAS_ARGBCOPYALPHAROW_SSE2
// width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
@@ -3733,21 +3471,20 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
psrld xmm1, 8
- align 4
convertloop:
- movdqa xmm2, [eax]
- movdqa xmm3, [eax + 16]
+ movdqu xmm2, [eax]
+ movdqu xmm3, [eax + 16]
lea eax, [eax + 32]
- movdqa xmm4, [edx]
- movdqa xmm5, [edx + 16]
+ movdqu xmm4, [edx]
+ movdqu xmm5, [edx + 16]
pand xmm2, xmm0
pand xmm3, xmm0
pand xmm4, xmm1
pand xmm5, xmm1
por xmm2, xmm4
por xmm3, xmm5
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm3
+ movdqu [edx], xmm2
+ movdqu [edx + 16], xmm3
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
@@ -3759,7 +3496,7 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
// width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
@@ -3768,7 +3505,6 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
vpcmpeqb ymm0, ymm0, ymm0
vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
- align 4
convertloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + 32]
@@ -3789,7 +3525,7 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
@@ -3800,23 +3536,22 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
psrld xmm1, 8
- align 4
convertloop:
movq xmm2, qword ptr [eax] // 8 Y's
lea eax, [eax + 8]
punpcklbw xmm2, xmm2
punpckhwd xmm3, xmm2
punpcklwd xmm2, xmm2
- movdqa xmm4, [edx]
- movdqa xmm5, [edx + 16]
+ movdqu xmm4, [edx]
+ movdqu xmm5, [edx + 16]
pand xmm2, xmm0
pand xmm3, xmm0
pand xmm4, xmm1
pand xmm5, xmm1
por xmm2, xmm4
por xmm3, xmm5
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm3
+ movdqu [edx], xmm2
+ movdqu [edx + 16], xmm3
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
@@ -3828,7 +3563,7 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
// width in pixels
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
@@ -3837,7 +3572,6 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
vpcmpeqb ymm0, ymm0, ymm0
vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
- align 4
convertloop:
vpmovzxbd ymm1, qword ptr [eax]
vpmovzxbd ymm2, qword ptr [eax + 8]
@@ -3859,13 +3593,16 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
#ifdef HAS_SETROW_X86
-// SetRow8 writes 'count' bytes using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
-void SetRow_X86(uint8* dst, uint32 v32, int count) {
+// Write 'count' bytes using an 8 bit value repeated.
+// Count should be multiple of 4.
+__declspec(naked)
+void SetRow_X86(uint8* dst, uint8 v8, int count) {
__asm {
+ movzx eax, byte ptr [esp + 8] // v8
+ mov edx, 0x01010101 // Duplicate byte to all bytes.
+ mul edx // overwrites edx with upper part of result.
mov edx, edi
mov edi, [esp + 4] // dst
- mov eax, [esp + 8] // v32
mov ecx, [esp + 12] // count
shr ecx, 2
rep stosd
@@ -3874,50 +3611,45 @@ void SetRow_X86(uint8* dst, uint32 v32, int count) {
}
}
-// SetRow32 writes 'count' words using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
+// Write 'count' bytes using an 8 bit value repeated.
+__declspec(naked)
+void SetRow_ERMS(uint8* dst, uint8 v8, int count) {
__asm {
- push esi
- push edi
- push ebp
- mov edi, [esp + 12 + 4] // dst
- mov eax, [esp + 12 + 8] // v32
- mov ebp, [esp + 12 + 12] // width
- mov edx, [esp + 12 + 16] // dst_stride
- mov esi, [esp + 12 + 20] // height
- lea ecx, [ebp * 4]
- sub edx, ecx // stride - width * 4
+ mov edx, edi
+ mov edi, [esp + 4] // dst
+ mov eax, [esp + 8] // v8
+ mov ecx, [esp + 12] // count
+ rep stosb
+ mov edi, edx
+ ret
+ }
+}
- align 4
- convertloop:
- mov ecx, ebp
+// Write 'count' 32 bit values.
+__declspec(naked)
+void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
+ __asm {
+ mov edx, edi
+ mov edi, [esp + 4] // dst
+ mov eax, [esp + 8] // v32
+ mov ecx, [esp + 12] // count
rep stosd
- add edi, edx
- sub esi, 1
- jg convertloop
-
- pop ebp
- pop edi
- pop esi
+ mov edi, edx
ret
}
}
#endif // HAS_SETROW_X86
#ifdef HAS_YUY2TOYROW_AVX2
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_AVX2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
+__declspec(naked)
+void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] // src_yuy2
mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
vpsrlw ymm5, ymm5, 8
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -3926,18 +3658,18 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2,
vpand ymm1, ymm1, ymm5
vpackuswb ymm0, ymm0, ymm1 // mutates.
vpermq ymm0, ymm0, 0xd8
- sub ecx, 32
vmovdqu [edx], ymm0
lea edx, [edx + 32]
+ sub ecx, 32
jg convertloop
vzeroupper
ret
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push esi
push edi
@@ -3945,12 +3677,11 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
mov esi, [esp + 8 + 8] // stride_yuy2
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
+ mov ecx, [esp + 8 + 20] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
vpsrlw ymm5, ymm5, 8
sub edi, edx
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -3980,20 +3711,19 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_yuy2
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
+ mov ecx, [esp + 4 + 16] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
vpsrlw ymm5, ymm5, 8
sub edi, edx
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -4020,15 +3750,14 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void UYVYToYRow_AVX2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
+ uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] // src_uyvy
mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -4037,18 +3766,18 @@ void UYVYToYRow_AVX2(const uint8* src_uyvy,
vpsrlw ymm1, ymm1, 8
vpackuswb ymm0, ymm0, ymm1 // mutates.
vpermq ymm0, ymm0, 0xd8
- sub ecx, 32
vmovdqu [edx], ymm0
lea edx, [edx + 32]
+ sub ecx, 32
jg convertloop
- ret
vzeroupper
+ ret
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push esi
push edi
@@ -4056,12 +3785,11 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
mov esi, [esp + 8 + 8] // stride_yuy2
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
+ mov ecx, [esp + 8 + 20] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
vpsrlw ymm5, ymm5, 8
sub edi, edx
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -4091,20 +3819,19 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_yuy2
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
+ mov ecx, [esp + 4 + 16] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
vpsrlw ymm5, ymm5, 8
sub edi, edx
- align 4
convertloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
@@ -4133,35 +3860,34 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
#endif // HAS_YUY2TOYROW_AVX2
#ifdef HAS_YUY2TOYROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void YUY2ToYRow_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
+ uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] // src_yuy2
mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
- align 4
convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
pand xmm0, xmm5 // even bytes are Y
pand xmm1, xmm5
packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 16
jg convertloop
ret
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push esi
push edi
@@ -4169,17 +3895,16 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
mov esi, [esp + 8 + 8] // stride_yuy2
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
+ mov ecx, [esp + 8 + 20] // width
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
- align 4
convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
lea eax, [eax + 32]
pavgb xmm0, xmm2
pavgb xmm1, xmm3
@@ -4203,127 +3928,19 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_yuy2
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
+ mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_yuy2
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // even bytes are Y
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -4347,120 +3964,14 @@ void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void UYVYToYRow_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
+ uint8* dst_y, int width) {
__asm {
mov eax, [esp + 4] // src_uyvy
mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
+ mov ecx, [esp + 12] // width
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // odd bytes are Y
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_uyvy
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
-
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -4468,17 +3979,17 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
psrlw xmm0, 8 // odd bytes are Y
psrlw xmm1, 8
packuswb xmm0, xmm1
- sub ecx, 16
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 16
jg convertloop
ret
}
}
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+__declspec(naked)
+void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push esi
push edi
@@ -4486,12 +3997,11 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
mov esi, [esp + 8 + 8] // stride_yuy2
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
+ mov ecx, [esp + 8 + 20] // width
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -4520,20 +4030,19 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
}
}
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
+__declspec(naked)
+void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
+ uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_yuy2
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
+ mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
- align 4
convertloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
@@ -4558,127 +4067,122 @@ void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
}
#endif // HAS_YUY2TOYROW_SSE2
-#ifdef HAS_ARGBBLENDROW_SSE2
+#ifdef HAS_BLENDPLANEROW_SSSE3
// Blend 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+__declspec(naked)
+void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) {
__asm {
push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm7, xmm7 // generate constant 1
- psrlw xmm7, 15
- pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
- psrlw xmm6, 8
+ push edi
pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
psllw xmm5, 8
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
+ mov eax, 0x80808080 // 128 for biasing image to signed.
+ movd xmm6, eax
+ pshufd xmm6, xmm6, 0x00
- sub ecx, 1
- je convertloop1 // only 1 pixel?
- jl convertloop1b
+ mov eax, 0x807f807f // 32768 + 127 for unbias and round.
+ movd xmm7, eax
+ pshufd xmm7, xmm7, 0x00
+ mov eax, [esp + 8 + 4] // src0
+ mov edx, [esp + 8 + 8] // src1
+ mov esi, [esp + 8 + 12] // alpha
+ mov edi, [esp + 8 + 16] // dst
+ mov ecx, [esp + 8 + 20] // width
+ sub eax, esi
+ sub edx, esi
+ sub edi, esi
- // 1 pixel loop until destination pointer is aligned.
- alignloop1:
- test edx, 15 // aligned?
- je alignloop1b
- movd xmm3, [eax]
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge alignloop1
+ // 8 pixel loop.
+ convertloop8:
+ movq xmm0, qword ptr [esi] // alpha
+ punpcklbw xmm0, xmm0
+ pxor xmm0, xmm5 // a, 255-a
+ movq xmm1, qword ptr [eax + esi] // src0
+ movq xmm2, qword ptr [edx + esi] // src1
+ punpcklbw xmm1, xmm2
+ psubb xmm1, xmm6 // bias src0/1 - 128
+ pmaddubsw xmm0, xmm1
+ paddw xmm0, xmm7 // unbias result - 32768 and round.
+ psrlw xmm0, 8
+ packuswb xmm0, xmm0
+ movq qword ptr [edi + esi], xmm0
+ lea esi, [esi + 8]
+ sub ecx, 8
+ jg convertloop8
- alignloop1b:
- add ecx, 1 - 4
- jl convertloop4b
-
- // 4 pixel loop.
- convertloop4:
- movdqu xmm3, [eax] // src argb
- lea eax, [eax + 16]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movdqu xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movdqu xmm1, [esi] // _a_g
- lea esi, [esi + 16]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jge convertloop4
-
- convertloop4b:
- add ecx, 4 - 1
- jl convertloop1b
-
- // 1 pixel loop.
- convertloop1:
- movd xmm3, [eax] // src argb
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge convertloop1
-
- convertloop1b:
+ pop edi
pop esi
ret
}
}
-#endif // HAS_ARGBBLENDROW_SSE2
+#endif // HAS_BLENDPLANEROW_SSSE3
+
+#ifdef HAS_BLENDPLANEROW_AVX2
+// Blend 32 pixels at a time.
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+__declspec(naked)
+void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) {
+ __asm {
+ push esi
+ push edi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff00ff00
+ vpsllw ymm5, ymm5, 8
+ mov eax, 0x80808080 // 128 for biasing image to signed.
+ vmovd xmm6, eax
+ vbroadcastss ymm6, xmm6
+ mov eax, 0x807f807f // 32768 + 127 for unbias and round.
+ vmovd xmm7, eax
+ vbroadcastss ymm7, xmm7
+ mov eax, [esp + 8 + 4] // src0
+ mov edx, [esp + 8 + 8] // src1
+ mov esi, [esp + 8 + 12] // alpha
+ mov edi, [esp + 8 + 16] // dst
+ mov ecx, [esp + 8 + 20] // width
+ sub eax, esi
+ sub edx, esi
+ sub edi, esi
+
+ // 32 pixel loop.
+ convertloop32:
+ vmovdqu ymm0, [esi] // alpha
+ vpunpckhbw ymm3, ymm0, ymm0 // 8..15, 24..31
+ vpunpcklbw ymm0, ymm0, ymm0 // 0..7, 16..23
+ vpxor ymm3, ymm3, ymm5 // a, 255-a
+ vpxor ymm0, ymm0, ymm5 // a, 255-a
+ vmovdqu ymm1, [eax + esi] // src0
+ vmovdqu ymm2, [edx + esi] // src1
+ vpunpckhbw ymm4, ymm1, ymm2
+ vpunpcklbw ymm1, ymm1, ymm2
+ vpsubb ymm4, ymm4, ymm6 // bias src0/1 - 128
+ vpsubb ymm1, ymm1, ymm6 // bias src0/1 - 128
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpmaddubsw ymm0, ymm0, ymm1
+ vpaddw ymm3, ymm3, ymm7 // unbias result - 32768 and round.
+ vpaddw ymm0, ymm0, ymm7 // unbias result - 32768 and round.
+ vpsrlw ymm3, ymm3, 8
+ vpsrlw ymm0, ymm0, 8
+ vpackuswb ymm0, ymm0, ymm3
+ vmovdqu [edi + esi], ymm0
+ lea esi, [esi + 32]
+ sub ecx, 32
+ jg convertloop32
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_BLENDPLANEROW_AVX2
#ifdef HAS_ARGBBLENDROW_SSSE3
// Shuffle table for isolating alpha.
@@ -4686,15 +4190,9 @@ static const uvec8 kShuffleAlpha = {
3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
};
-// Same as SSE2, but replaces:
-// psrlw xmm3, 8 // alpha
-// pshufhw xmm3, xmm3, 0F5h // 8 alpha words
-// pshuflw xmm3, xmm3, 0F5h
-// with..
-// pshufb xmm3, kShuffleAlpha // alpha
-// Blend 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+// Blend 8 pixels at a time.
+__declspec(naked)
void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
@@ -4711,81 +4209,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
psllw xmm5, 8
pcmpeqb xmm4, xmm4 // generate mask 0xff000000
pslld xmm4, 24
-
- sub ecx, 1
- je convertloop1 // only 1 pixel?
- jl convertloop1b
-
- // 1 pixel loop until destination pointer is aligned.
- alignloop1:
- test edx, 15 // aligned?
- je alignloop1b
- movd xmm3, [eax]
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge alignloop1
-
- alignloop1b:
- add ecx, 1 - 4
- jl convertloop4b
-
- test eax, 15 // unaligned?
- jne convertuloop4
- test esi, 15 // unaligned?
- jne convertuloop4
+ sub ecx, 4
+ jl convertloop4b // less than 4 pixels?
// 4 pixel loop.
convertloop4:
- movdqa xmm3, [eax] // src argb
- lea eax, [eax + 16]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movdqa xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movdqa xmm1, [esi] // _a_g
- lea esi, [esi + 16]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jge convertloop4
- jmp convertloop4b
-
- // 4 pixel unaligned loop.
- convertuloop4:
movdqu xmm3, [eax] // src argb
lea eax, [eax + 16]
movdqa xmm0, xmm3 // src argb
pxor xmm3, xmm4 // ~alpha
movdqu xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
+ pshufb xmm3, xmmword ptr kShuffleAlpha // alpha
pand xmm2, xmm6 // _r_b
paddw xmm3, xmm7 // 256 - alpha
pmullw xmm2, xmm3 // _r_b * alpha
@@ -4798,10 +4232,10 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
paddusb xmm0, xmm2 // + src argb
pand xmm1, xmm5 // a_g_ convert to 8 bits again
paddusb xmm0, xmm1 // + src argb
- sub ecx, 4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
- jge convertuloop4
+ sub ecx, 4
+ jge convertloop4
convertloop4b:
add ecx, 4 - 1
@@ -4814,7 +4248,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
movdqa xmm0, xmm3 // src argb
pxor xmm3, xmm4 // ~alpha
movd xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
+ pshufb xmm3, xmmword ptr kShuffleAlpha // alpha
pand xmm2, xmm6 // _r_b
paddw xmm3, xmm7 // 256 - alpha
pmullw xmm2, xmm3 // _r_b * alpha
@@ -4827,9 +4261,9 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
paddusb xmm0, xmm2 // + src argb
pand xmm1, xmm5 // a_g_ convert to 8 bits again
paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
movd [edx], xmm0
lea edx, [edx + 4]
+ sub ecx, 1
jge convertloop1
convertloop1b:
@@ -4839,50 +4273,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
}
#endif // HAS_ARGBBLENDROW_SSSE3
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
- pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff
- psrld xmm5, 8
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 4 pixels
- punpcklbw xmm0, xmm0 // first 2
- pshufhw xmm2, xmm0, 0FFh // 8 alpha words
- pshuflw xmm2, xmm2, 0FFh
- pmulhuw xmm0, xmm2 // rgb * a
- movdqa xmm1, [eax] // read 4 pixels
- punpckhbw xmm1, xmm1 // next 2 pixels
- pshufhw xmm2, xmm1, 0FFh // 8 alpha words
- pshuflw xmm2, xmm2, 0FFh
- pmulhuw xmm1, xmm2 // rgb * a
- movdqa xmm2, [eax] // alphas
- lea eax, [eax + 16]
- psrlw xmm0, 8
- pand xmm2, xmm4
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- pand xmm0, xmm5 // keep original alphas
- por xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBATTENUATEROW_SSE2
-
#ifdef HAS_ARGBATTENUATEROW_SSSE3
// Shuffle table duplicating alpha.
static const uvec8 kShuffleAlpha0 = {
@@ -4892,7 +4282,7 @@ static const uvec8 kShuffleAlpha1 = {
11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
};
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_argb0
@@ -4900,10 +4290,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
mov ecx, [esp + 12] // width
pcmpeqb xmm3, xmm3 // generate mask 0xff000000
pslld xmm3, 24
- movdqa xmm4, kShuffleAlpha0
- movdqa xmm5, kShuffleAlpha1
+ movdqa xmm4, xmmword ptr kShuffleAlpha0
+ movdqa xmm5, xmmword ptr kShuffleAlpha1
- align 4
convertloop:
movdqu xmm0, [eax] // read 4 pixels
pshufb xmm0, xmm4 // isolate first 2 alphas
@@ -4922,9 +4311,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
psrlw xmm1, 8
packuswb xmm0, xmm1
por xmm0, xmm2 // copy original alpha
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg convertloop
ret
@@ -4934,24 +4323,20 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
#ifdef HAS_ARGBATTENUATEROW_AVX2
// Shuffle table duplicating alpha.
-static const ulvec8 kShuffleAlpha_AVX2 = {
- 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
- 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
- 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
- 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
+static const uvec8 kShuffleAlpha_AVX2 = {
+ 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u
};
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_argb0
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
- vmovdqa ymm4, kShuffleAlpha_AVX2
+ vbroadcastf128 ymm4, xmmword ptr kShuffleAlpha_AVX2
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
vpslld ymm5, ymm5, 24
- align 4
convertloop:
vmovdqu ymm6, [eax] // read 8 pixels.
vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
@@ -4965,9 +4350,9 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
vpsrlw ymm1, ymm1, 8
vpackuswb ymm0, ymm0, ymm1 // unmutated.
vpor ymm0, ymm0, ymm6 // copy original alpha
- sub ecx, 8
vmovdqu [eax + edx], ymm0
lea eax, [eax + 32]
+ sub ecx, 8
jg convertloop
vzeroupper
@@ -4978,25 +4363,25 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
// Unattenuate 4 pixels at a time.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
+ push ebx
push esi
push edi
- mov eax, [esp + 8 + 4] // src_argb0
- mov edx, [esp + 8 + 8] // dst_argb
- mov ecx, [esp + 8 + 12] // width
+ mov eax, [esp + 12 + 4] // src_argb
+ mov edx, [esp + 12 + 8] // dst_argb
+ mov ecx, [esp + 12 + 12] // width
+ lea ebx, fixed_invtbl8
- align 4
convertloop:
movdqu xmm0, [eax] // read 4 pixels
movzx esi, byte ptr [eax + 3] // first alpha
movzx edi, byte ptr [eax + 7] // second alpha
punpcklbw xmm0, xmm0 // first 2
- movd xmm2, dword ptr fixed_invtbl8[esi * 4]
- movd xmm3, dword ptr fixed_invtbl8[edi * 4]
+ movd xmm2, dword ptr [ebx + esi * 4]
+ movd xmm3, dword ptr [ebx + edi * 4]
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
movlhps xmm2, xmm3
@@ -5006,21 +4391,22 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
movzx esi, byte ptr [eax + 11] // third alpha
movzx edi, byte ptr [eax + 15] // forth alpha
punpckhbw xmm1, xmm1 // next 2
- movd xmm2, dword ptr fixed_invtbl8[esi * 4]
- movd xmm3, dword ptr fixed_invtbl8[edi * 4]
+ movd xmm2, dword ptr [ebx + esi * 4]
+ movd xmm3, dword ptr [ebx + edi * 4]
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
movlhps xmm2, xmm3
pmulhuw xmm1, xmm2 // rgb * a
lea eax, [eax + 16]
-
packuswb xmm0, xmm1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg convertloop
+
pop edi
pop esi
+ pop ebx
ret
}
}
@@ -5028,14 +4414,13 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
// Shuffle table duplicating alpha.
-static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
- 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
- 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
+static const uvec8 kUnattenShuffleAlpha_AVX2 = {
+ 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u
};
// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
// USE_GATHER is not on by default, due to being a slow instruction.
#ifdef USE_GATHER
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
@@ -5043,9 +4428,8 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
- vmovdqa ymm4, kUnattenShuffleAlpha_AVX2
+ vbroadcastf128 ymm4, xmmword ptr kUnattenShuffleAlpha_AVX2
- align 4
convertloop:
vmovdqu ymm6, [eax] // read 8 pixels.
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather.
@@ -5060,9 +4444,9 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
vpackuswb ymm0, ymm0, ymm1 // unmutated.
- sub ecx, 8
vmovdqu [eax + edx], ymm0
lea eax, [eax + 32]
+ sub ecx, 8
jg convertloop
vzeroupper
@@ -5070,42 +4454,42 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
}
}
#else // USE_GATHER
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- sub edx, eax
- vmovdqa ymm5, kUnattenShuffleAlpha_AVX2
-
+ push ebx
push esi
push edi
+ mov eax, [esp + 12 + 4] // src_argb
+ mov edx, [esp + 12 + 8] // dst_argb
+ mov ecx, [esp + 12 + 12] // width
+ sub edx, eax
+ lea ebx, fixed_invtbl8
+ vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
- align 4
convertloop:
// replace VPGATHER
movzx esi, byte ptr [eax + 3] // alpha0
movzx edi, byte ptr [eax + 7] // alpha1
- vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a0]
- vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a1]
+ vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0]
+ vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1]
movzx esi, byte ptr [eax + 11] // alpha2
movzx edi, byte ptr [eax + 15] // alpha3
vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
- vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a2]
- vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a3]
+ vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2]
+ vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3]
movzx esi, byte ptr [eax + 19] // alpha4
movzx edi, byte ptr [eax + 23] // alpha5
vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
- vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a4]
- vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a5]
+ vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4]
+ vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5]
movzx esi, byte ptr [eax + 27] // alpha6
movzx edi, byte ptr [eax + 31] // alpha7
vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
- vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a6]
- vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a7]
+ vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6]
+ vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7]
vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
@@ -5122,13 +4506,14 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
vpackuswb ymm0, ymm0, ymm1 // unmutated.
- sub ecx, 8
vmovdqu [eax + edx], ymm0
lea eax, [eax + 32]
+ sub ecx, 8
jg convertloop
pop edi
pop esi
+ pop ebx
vzeroupper
ret
}
@@ -5138,27 +4523,26 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBGRAYROW_SSSE3
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_argb */
mov ecx, [esp + 12] /* width */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
+ movdqa xmm4, xmmword ptr kARGBToYJ
+ movdqa xmm5, xmmword ptr kAddYJ64
- align 4
convertloop:
- movdqa xmm0, [eax] // G
- movdqa xmm1, [eax + 16]
+ movdqu xmm0, [eax] // G
+ movdqu xmm1, [eax + 16]
pmaddubsw xmm0, xmm4
pmaddubsw xmm1, xmm4
phaddw xmm0, xmm1
paddw xmm0, xmm5 // Add .5 for rounding.
psrlw xmm0, 7
packuswb xmm0, xmm0 // 8 G bytes
- movdqa xmm2, [eax] // A
- movdqa xmm3, [eax + 16]
+ movdqu xmm2, [eax] // A
+ movdqu xmm3, [eax + 16]
lea eax, [eax + 32]
psrld xmm2, 24
psrld xmm3, 24
@@ -5170,10 +4554,10 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
movdqa xmm1, xmm0
punpcklwd xmm0, xmm3 // GGGA first 4
punpckhwd xmm1, xmm3 // GGGA next 4
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
lea edx, [edx + 32]
+ sub ecx, 8
jg convertloop
ret
}
@@ -5198,41 +4582,40 @@ static const vec8 kARGBToSepiaR = {
};
// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] /* dst_argb */
mov ecx, [esp + 8] /* width */
- movdqa xmm2, kARGBToSepiaB
- movdqa xmm3, kARGBToSepiaG
- movdqa xmm4, kARGBToSepiaR
+ movdqa xmm2, xmmword ptr kARGBToSepiaB
+ movdqa xmm3, xmmword ptr kARGBToSepiaG
+ movdqa xmm4, xmmword ptr kARGBToSepiaR
- align 4
convertloop:
- movdqa xmm0, [eax] // B
- movdqa xmm6, [eax + 16]
+ movdqu xmm0, [eax] // B
+ movdqu xmm6, [eax + 16]
pmaddubsw xmm0, xmm2
pmaddubsw xmm6, xmm2
phaddw xmm0, xmm6
psrlw xmm0, 7
packuswb xmm0, xmm0 // 8 B values
- movdqa xmm5, [eax] // G
- movdqa xmm1, [eax + 16]
+ movdqu xmm5, [eax] // G
+ movdqu xmm1, [eax + 16]
pmaddubsw xmm5, xmm3
pmaddubsw xmm1, xmm3
phaddw xmm5, xmm1
psrlw xmm5, 7
packuswb xmm5, xmm5 // 8 G values
punpcklbw xmm0, xmm5 // 8 BG values
- movdqa xmm5, [eax] // R
- movdqa xmm1, [eax + 16]
+ movdqu xmm5, [eax] // R
+ movdqu xmm1, [eax + 16]
pmaddubsw xmm5, xmm4
pmaddubsw xmm1, xmm4
phaddw xmm5, xmm1
psrlw xmm5, 7
packuswb xmm5, xmm5 // 8 R values
- movdqa xmm6, [eax] // A
- movdqa xmm1, [eax + 16]
+ movdqu xmm6, [eax] // A
+ movdqu xmm1, [eax + 16]
psrld xmm6, 24
psrld xmm1, 24
packuswb xmm6, xmm1
@@ -5241,10 +4624,10 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
movdqa xmm1, xmm0 // Weave BG, RA together
punpcklwd xmm0, xmm5 // BGRA first 4
punpckhwd xmm1, xmm5 // BGRA next 4
- sub ecx, 8
- movdqa [eax], xmm0
- movdqa [eax + 16], xmm1
+ movdqu [eax], xmm0
+ movdqu [eax + 16], xmm1
lea eax, [eax + 32]
+ sub ecx, 8
jg convertloop
ret
}
@@ -5256,7 +4639,7 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
// Same as Sepia except matrix is provided.
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
const int8* matrix_argb, int width) {
__asm {
@@ -5270,14 +4653,13 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
pshufd xmm5, xmm5, 0xff
mov ecx, [esp + 16] /* width */
- align 4
convertloop:
- movdqa xmm0, [eax] // B
- movdqa xmm7, [eax + 16]
+ movdqu xmm0, [eax] // B
+ movdqu xmm7, [eax + 16]
pmaddubsw xmm0, xmm2
pmaddubsw xmm7, xmm2
- movdqa xmm6, [eax] // G
- movdqa xmm1, [eax + 16]
+ movdqu xmm6, [eax] // G
+ movdqu xmm1, [eax + 16]
pmaddubsw xmm6, xmm3
pmaddubsw xmm1, xmm3
phaddsw xmm0, xmm7 // B
@@ -5287,13 +4669,13 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
packuswb xmm0, xmm0 // 8 B values
packuswb xmm6, xmm6 // 8 G values
punpcklbw xmm0, xmm6 // 8 BG values
- movdqa xmm1, [eax] // R
- movdqa xmm7, [eax + 16]
+ movdqu xmm1, [eax] // R
+ movdqu xmm7, [eax + 16]
pmaddubsw xmm1, xmm4
pmaddubsw xmm7, xmm4
phaddsw xmm1, xmm7 // R
- movdqa xmm6, [eax] // A
- movdqa xmm7, [eax + 16]
+ movdqu xmm6, [eax] // A
+ movdqu xmm7, [eax + 16]
pmaddubsw xmm6, xmm5
pmaddubsw xmm7, xmm5
phaddsw xmm6, xmm7 // A
@@ -5305,11 +4687,11 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
movdqa xmm6, xmm0 // Weave BG, RA together
punpcklwd xmm0, xmm1 // BGRA first 4
punpckhwd xmm6, xmm1 // BGRA next 4
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm6
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm6
lea eax, [eax + 32]
lea edx, [edx + 32]
+ sub ecx, 8
jg convertloop
ret
}
@@ -5318,8 +4700,7 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) {
__asm {
@@ -5338,25 +4719,24 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
pcmpeqb xmm6, xmm6 // generate mask 0xff000000
pslld xmm6, 24
- align 4
convertloop:
- movdqa xmm0, [eax] // read 4 pixels
+ movdqu xmm0, [eax] // read 4 pixels
punpcklbw xmm0, xmm5 // first 2 pixels
pmulhuw xmm0, xmm2 // pixel * scale >> 16
- movdqa xmm1, [eax] // read 4 pixels
+ movdqu xmm1, [eax] // read 4 pixels
punpckhbw xmm1, xmm5 // next 2 pixels
pmulhuw xmm1, xmm2
pmullw xmm0, xmm3 // * interval_size
- movdqa xmm7, [eax] // read 4 pixels
+ movdqu xmm7, [eax] // read 4 pixels
pmullw xmm1, xmm3
pand xmm7, xmm6 // mask alpha
paddw xmm0, xmm4 // + interval_size / 2
paddw xmm1, xmm4
packuswb xmm0, xmm1
por xmm0, xmm7
- sub ecx, 4
- movdqa [eax], xmm0
+ movdqu [eax], xmm0
lea eax, [eax + 16]
+ sub ecx, 4
jg convertloop
ret
}
@@ -5365,8 +4745,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
#ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value) {
__asm {
@@ -5377,9 +4756,8 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
punpcklbw xmm2, xmm2
punpcklqdq xmm2, xmm2
- align 4
convertloop:
- movdqa xmm0, [eax] // read 4 pixels
+ movdqu xmm0, [eax] // read 4 pixels
lea eax, [eax + 16]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm0 // first 2
@@ -5389,9 +4767,9 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
psrlw xmm0, 8
psrlw xmm1, 8
packuswb xmm0, xmm1
- sub ecx, 4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg convertloop
ret
@@ -5401,7 +4779,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
#ifdef HAS_ARGBMULTIPLYROW_SSE2
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
@@ -5412,7 +4790,6 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
mov ecx, [esp + 4 + 16] // width
pxor xmm5, xmm5 // constant 0
- align 4
convertloop:
movdqu xmm0, [eax] // read 4 pixels from src_argb0
movdqu xmm2, [esi] // read 4 pixels from src_argb1
@@ -5427,9 +4804,9 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
lea eax, [eax + 16]
lea esi, [esi + 16]
packuswb xmm0, xmm1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg convertloop
pop esi
@@ -5441,7 +4818,7 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBADDROW_SSE2
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
// TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
@@ -5454,16 +4831,15 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
sub ecx, 4
jl convertloop49
- align 4
convertloop4:
movdqu xmm0, [eax] // read 4 pixels from src_argb0
lea eax, [eax + 16]
movdqu xmm1, [esi] // read 4 pixels from src_argb1
lea esi, [esi + 16]
paddusb xmm0, xmm1 // src_argb0 + src_argb1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jge convertloop4
convertloop49:
@@ -5476,9 +4852,9 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
movd xmm1, [esi] // read 1 pixels from src_argb1
lea esi, [esi + 4]
paddusb xmm0, xmm1 // src_argb0 + src_argb1
- sub ecx, 1
movd [edx], xmm0
lea edx, [edx + 4]
+ sub ecx, 1
jge convertloop1
convertloop19:
@@ -5490,7 +4866,7 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBSUBTRACTROW_SSE2
// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
@@ -5500,16 +4876,15 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
mov edx, [esp + 4 + 12] // dst_argb
mov ecx, [esp + 4 + 16] // width
- align 4
convertloop:
movdqu xmm0, [eax] // read 4 pixels from src_argb0
lea eax, [eax + 16]
movdqu xmm1, [esi] // read 4 pixels from src_argb1
lea esi, [esi + 16]
psubusb xmm0, xmm1 // src_argb0 - src_argb1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg convertloop
pop esi
@@ -5520,7 +4895,7 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBMULTIPLYROW_AVX2
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
@@ -5531,7 +4906,6 @@ void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
mov ecx, [esp + 4 + 16] // width
vpxor ymm5, ymm5, ymm5 // constant 0
- align 4
convertloop:
vmovdqu ymm1, [eax] // read 8 pixels from src_argb0
lea eax, [eax + 32]
@@ -5558,7 +4932,7 @@ void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBADDROW_AVX2
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
@@ -5568,7 +4942,6 @@ void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
mov edx, [esp + 4 + 12] // dst_argb
mov ecx, [esp + 4 + 16] // width
- align 4
convertloop:
vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
lea eax, [eax + 32]
@@ -5588,7 +4961,7 @@ void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBSUBTRACTROW_AVX2
// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
__asm {
@@ -5598,7 +4971,6 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
mov edx, [esp + 4 + 12] // dst_argb
mov ecx, [esp + 4 + 16] // width
- align 4
convertloop:
vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
lea eax, [eax + 32]
@@ -5621,7 +4993,7 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
// -1 0 1
// -2 0 2
// -1 0 1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
__asm {
@@ -5637,7 +5009,6 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
sub edx, eax
pxor xmm5, xmm5 // constant 0
- align 4
convertloop:
movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
@@ -5661,9 +5032,9 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
psubw xmm1, xmm0
pmaxsw xmm0, xmm1
packuswb xmm0, xmm0
- sub ecx, 8
movq qword ptr [eax + edx], xmm0
lea eax, [eax + 8]
+ sub ecx, 8
jg convertloop
pop edi
@@ -5678,7 +5049,7 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
// -1 -2 -1
// 0 0 0
// 1 2 1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
__asm {
@@ -5691,7 +5062,6 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
sub edx, eax
pxor xmm5, xmm5 // constant 0
- align 4
convertloop:
movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
@@ -5715,9 +5085,9 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
psubw xmm1, xmm0
pmaxsw xmm0, xmm1
packuswb xmm0, xmm0
- sub ecx, 8
movq qword ptr [eax + edx], xmm0
lea eax, [eax + 8]
+ sub ecx, 8
jg convertloop
pop esi
@@ -5732,7 +5102,7 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
// R = Sobel
// G = Sobel
// B = Sobel
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
__asm {
@@ -5745,10 +5115,9 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
pcmpeqb xmm5, xmm5 // alpha 255
pslld xmm5, 24 // 0xff000000
- align 4
convertloop:
- movdqa xmm0, [eax] // read 16 pixels src_sobelx
- movdqa xmm1, [eax + esi] // read 16 pixels src_sobely
+ movdqu xmm0, [eax] // read 16 pixels src_sobelx
+ movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
lea eax, [eax + 16]
paddusb xmm0, xmm1 // sobel = sobelx + sobely
movdqa xmm2, xmm0 // GG
@@ -5764,12 +5133,12 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
punpckhwd xmm0, xmm0 // Last 4
por xmm3, xmm5 // GGGA
por xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm1
- movdqa [edx + 16], xmm2
- movdqa [edx + 32], xmm3
- movdqa [edx + 48], xmm0
+ movdqu [edx], xmm1
+ movdqu [edx + 16], xmm2
+ movdqu [edx + 32], xmm3
+ movdqu [edx + 48], xmm0
lea edx, [edx + 64]
+ sub ecx, 16
jg convertloop
pop esi
@@ -5780,7 +5149,7 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
#ifdef HAS_SOBELTOPLANEROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width) {
__asm {
@@ -5791,15 +5160,14 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
mov ecx, [esp + 4 + 16] // width
sub esi, eax
- align 4
convertloop:
- movdqa xmm0, [eax] // read 16 pixels src_sobelx
- movdqa xmm1, [eax + esi] // read 16 pixels src_sobely
+ movdqu xmm0, [eax] // read 16 pixels src_sobelx
+ movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
lea eax, [eax + 16]
paddusb xmm0, xmm1 // sobel = sobelx + sobely
- sub ecx, 16
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 16
jg convertloop
pop esi
@@ -5814,7 +5182,7 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
// R = Sobel X
// G = Sobel
// B = Sobel Y
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) {
__asm {
@@ -5826,10 +5194,9 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
sub esi, eax
pcmpeqb xmm5, xmm5 // alpha 255
- align 4
convertloop:
- movdqa xmm0, [eax] // read 16 pixels src_sobelx
- movdqa xmm1, [eax + esi] // read 16 pixels src_sobely
+ movdqu xmm0, [eax] // read 16 pixels src_sobelx
+ movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
lea eax, [eax + 16]
movdqa xmm2, xmm0
paddusb xmm2, xmm1 // sobel = sobelx + sobely
@@ -5845,12 +5212,12 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
movdqa xmm7, xmm1 // YSXA
punpcklwd xmm7, xmm0 // Next 4
punpckhwd xmm1, xmm0 // Last 4
- sub ecx, 16
- movdqa [edx], xmm6
- movdqa [edx + 16], xmm4
- movdqa [edx + 32], xmm7
- movdqa [edx + 48], xmm1
+ movdqu [edx], xmm6
+ movdqu [edx + 16], xmm4
+ movdqu [edx + 32], xmm7
+ movdqu [edx + 48], xmm1
lea edx, [edx + 64]
+ sub ecx, 16
jg convertloop
pop esi
@@ -5871,8 +5238,7 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
// area is the number of pixels in the area being averaged.
// dst points to pixel to store result to.
// count is number of averaged pixels to produce.
-// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte
-// aligned.
+// Does 4 pixels at a time.
void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst,
int count) {
@@ -5902,13 +5268,12 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
packssdw xmm5, xmm5 // 16 bit shorts
// 4 pixel loop small blocks.
- align 4
s4:
// top left
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
// - top right
psubd xmm0, [eax + edx * 4]
@@ -5945,13 +5310,12 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
jmp l4b
// 4 pixel loop
- align 4
l4:
// top left
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
// - top right
psubd xmm0, [eax + edx * 4]
@@ -5998,9 +5362,8 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
jl l1b
// 1 pixel loop
- align 4
l1:
- movdqa xmm0, [eax]
+ movdqu xmm0, [eax]
psubd xmm0, [eax + edx * 4]
lea eax, [eax + 16]
psubd xmm0, [esi]
@@ -6039,7 +5402,6 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
jne l4b
// 4 pixel loop
- align 4
l4:
movdqu xmm2, [eax] // 4 argb pixels 16 bytes.
lea eax, [eax + 16]
@@ -6056,26 +5418,26 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
punpckhwd xmm5, xmm1
paddd xmm0, xmm2
- movdqa xmm2, [esi] // previous row above.
+ movdqu xmm2, [esi] // previous row above.
paddd xmm2, xmm0
paddd xmm0, xmm3
- movdqa xmm3, [esi + 16]
+ movdqu xmm3, [esi + 16]
paddd xmm3, xmm0
paddd xmm0, xmm4
- movdqa xmm4, [esi + 32]
+ movdqu xmm4, [esi + 32]
paddd xmm4, xmm0
paddd xmm0, xmm5
- movdqa xmm5, [esi + 48]
+ movdqu xmm5, [esi + 48]
lea esi, [esi + 64]
paddd xmm5, xmm0
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm3
- movdqa [edx + 32], xmm4
- movdqa [edx + 48], xmm5
+ movdqu [edx], xmm2
+ movdqu [edx + 16], xmm3
+ movdqu [edx + 32], xmm4
+ movdqu [edx + 48], xmm5
lea edx, [edx + 64]
sub ecx, 4
@@ -6086,7 +5448,6 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
jl l1b
// 1 pixel loop
- align 4
l1:
movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
lea eax, [eax + 4]
@@ -6108,7 +5469,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width) {
@@ -6141,7 +5502,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
addps xmm4, xmm4 // dudv *= 4
// 4 pixel loop
- align 4
l4:
cvttps2dq xmm0, xmm2 // x, y float to int first 2
cvttps2dq xmm1, xmm3 // x, y float to int next 2
@@ -6163,9 +5523,9 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
movd xmm0, [eax + edi] // read pixel 3
punpckldq xmm6, xmm0 // combine pixel 2 and 3
addps xmm3, xmm4 // x, y += dx, dy next 2
- sub ecx, 4
movq qword ptr 8[edx], xmm6
lea edx, [edx + 16]
+ sub ecx, 4
jge l4
l4b:
@@ -6173,7 +5533,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
jl l1b
// 1 pixel loop
- align 4
l1:
cvttps2dq xmm0, xmm2 // x, y float to int
packssdw xmm0, xmm0 // x, y as shorts
@@ -6181,9 +5540,9 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
addps xmm2, xmm7 // x, y += dx, dy
movd esi, xmm0
movd xmm0, [eax + esi] // copy a pixel
- sub ecx, 1
movd [edx], xmm0
lea edx, [edx + 4]
+ sub ecx, 1
jge l1
l1b:
pop edi
@@ -6194,217 +5553,9 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
#endif // HAS_ARGBAFFINEROW_SSE2
#ifdef HAS_INTERPOLATEROW_AVX2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
+// Bilinear filter 32x2 -> 32x1
+__declspec(naked)
void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- shr eax, 1
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- sub edi, esi
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
-
- vmovd xmm0, eax // high fraction 0..127
- neg eax
- add eax, 128
- vmovd xmm5, eax // low fraction 128..1
- vpunpcklbw xmm5, xmm5, xmm0
- vpunpcklwd xmm5, xmm5, xmm5
- vpxor ymm0, ymm0, ymm0
- vpermd ymm5, ymm0, ymm5
-
- align 4
- xloop:
- vmovdqu ymm0, [esi]
- vmovdqu ymm2, [esi + edx]
- vpunpckhbw ymm1, ymm0, ymm2 // mutates
- vpunpcklbw ymm0, ymm0, ymm2 // mutates
- vpmaddubsw ymm0, ymm0, ymm5
- vpmaddubsw ymm1, ymm1, ymm5
- vpsrlw ymm0, ymm0, 7
- vpsrlw ymm1, ymm1, 7
- vpackuswb ymm0, ymm0, ymm1 // unmutates
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- vmovdqu ymm0, [esi]
- vpavgb ymm0, ymm0, [esi + edx]
- vpavgb ymm0, ymm0, [esi + edx]
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- vmovdqu ymm0, [esi]
- vpavgb ymm0, ymm0, [esi + edx]
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- vmovdqu ymm0, [esi + edx]
- vpavgb ymm0, ymm0, [esi]
- vpavgb ymm0, ymm0, [esi]
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- rep movsb
-
- xloop99:
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_AVX2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- shr eax, 1
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
-
- movd xmm0, eax // high fraction 0..127
- neg eax
- add eax, 128
- movd xmm5, eax // low fraction 128..1
- punpcklbw xmm5, xmm0
- punpcklwd xmm5, xmm5
- pshufd xmm5, xmm5, 0
-
- align 4
- xloop:
- movdqa xmm0, [esi]
- movdqa xmm2, [esi + edx]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- pmaddubsw xmm0, xmm5
- pmaddubsw xmm1, xmm5
- psrlw xmm0, 7
- psrlw xmm1, 7
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqa xmm1, [esi]
- movdqa xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- movdqa xmm0, [esi]
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
__asm {
@@ -6415,201 +5566,141 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
// Dispatch to specialized filters if applicable.
cmp eax, 0
je xloop100 // 0 / 256. Blend 100 / 0.
- cmp eax, 64
- je xloop75 // 64 / 256 is 0.25. Blend 75 / 25.
+ sub edi, esi
cmp eax, 128
- je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
- cmp eax, 192
- je xloop25 // 192 / 256 is 0.75. Blend 25 / 75.
+ je xloop50 // 128 /256 is 0.50. Blend 50 / 50.
- movd xmm5, eax // xmm5 = y fraction
- punpcklbw xmm5, xmm5
- psrlw xmm5, 1
- punpcklwd xmm5, xmm5
- punpckldq xmm5, xmm5
- punpcklqdq xmm5, xmm5
- pxor xmm4, xmm4
+ vmovd xmm0, eax // high fraction 0..255
+ neg eax
+ add eax, 256
+ vmovd xmm5, eax // low fraction 256..1
+ vpunpcklbw xmm5, xmm5, xmm0
+ vpunpcklwd xmm5, xmm5, xmm5
+ vbroadcastss ymm5, xmm5
+
+ mov eax, 0x80808080 // 128b for bias and rounding.
+ vmovd xmm4, eax
+ vbroadcastss ymm4, xmm4
- align 4
xloop:
- movdqa xmm0, [esi] // row0
- movdqa xmm2, [esi + edx] // row1
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
- punpcklbw xmm0, xmm4
- punpckhbw xmm1, xmm4
- psubw xmm2, xmm0 // row1 - row0
- psubw xmm3, xmm1
- paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16
- paddw xmm3, xmm3
- pmulhw xmm2, xmm5 // scale diff
- pmulhw xmm3, xmm5
- paddw xmm0, xmm2 // sum rows
- paddw xmm1, xmm3
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
+ vmovdqu ymm0, [esi]
+ vmovdqu ymm2, [esi + edx]
+ vpunpckhbw ymm1, ymm0, ymm2 // mutates
+ vpunpcklbw ymm0, ymm0, ymm2
+ vpsubb ymm1, ymm1, ymm4 // bias to signed image
+ vpsubb ymm0, ymm0, ymm4
+ vpmaddubsw ymm1, ymm5, ymm1
+ vpmaddubsw ymm0, ymm5, ymm0
+ vpaddw ymm1, ymm1, ymm4 // unbias and round
+ vpaddw ymm0, ymm0, ymm4
+ vpsrlw ymm1, ymm1, 8
+ vpsrlw ymm0, ymm0, 8
+ vpackuswb ymm0, ymm0, ymm1 // unmutates
+ vmovdqu [esi + edi], ymm0
+ lea esi, [esi + 32]
+ sub ecx, 32
jg xloop
jmp xloop99
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
+ // Blend 50 / 50.
+ xloop50:
+ vmovdqu ymm0, [esi]
+ vpavgb ymm0, ymm0, [esi + edx]
+ vmovdqu [esi + edi], ymm0
+ lea esi, [esi + 32]
+ sub ecx, 32
+ jg xloop50
+ jmp xloop99
- // Blend 50 / 50.
- align 4
- xloop50:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqa xmm1, [esi]
- movdqa xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- movdqa xmm0, [esi]
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop100
+ // Blend 100 / 0 - Copy row unchanged.
+ xloop100:
+ rep movsb
xloop99:
pop edi
pop esi
+ vzeroupper
ret
}
}
-#endif // HAS_INTERPOLATEROW_SSE2
+#endif // HAS_INTERPOLATEROW_AVX2
// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
+// TODO(fbarchard): Consider allowing 256 using memcpy.
+__declspec(naked)
+void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) {
__asm {
push esi
push edi
+
mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
sub edi, esi
- shr eax, 1
// Dispatch to specialized filters if applicable.
cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
+ je xloop100 // 0 /256. Blend 100 / 0.
+ cmp eax, 128
+ je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
- movd xmm0, eax // high fraction 0..127
+ movd xmm0, eax // high fraction 0..255
neg eax
- add eax, 128
- movd xmm5, eax // low fraction 128..1
+ add eax, 256
+ movd xmm5, eax // low fraction 255..1
punpcklbw xmm5, xmm0
punpcklwd xmm5, xmm5
pshufd xmm5, xmm5, 0
+ mov eax, 0x80808080 // 128 for biasing image to signed.
+ movd xmm4, eax
+ pshufd xmm4, xmm4, 0x00
- align 4
xloop:
movdqu xmm0, [esi]
movdqu xmm2, [esi + edx]
movdqu xmm1, xmm0
punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
- pmaddubsw xmm0, xmm5
- pmaddubsw xmm1, xmm5
- psrlw xmm0, 7
- psrlw xmm1, 7
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
+ psubb xmm0, xmm4 // bias image by -128
+ psubb xmm1, xmm4
+ movdqa xmm2, xmm5
+ movdqa xmm3, xmm5
+ pmaddubsw xmm2, xmm0
+ pmaddubsw xmm3, xmm1
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+ psrlw xmm2, 8
+ psrlw xmm3, 8
+ packuswb xmm2, xmm3
+ movdqu [esi + edi], xmm2
lea esi, [esi + 16]
+ sub ecx, 16
jg xloop
jmp xloop99
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
-
// Blend 50 / 50.
- align 4
xloop50:
movdqu xmm0, [esi]
movdqu xmm1, [esi + edx]
pavgb xmm0, xmm1
- sub ecx, 16
movdqu [esi + edi], xmm0
lea esi, [esi + 16]
+ sub ecx, 16
jg xloop50
jmp xloop99
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqu xmm1, [esi]
- movdqu xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
// Blend 100 / 0 - Copy row unchanged.
- align 4
xloop100:
movdqu xmm0, [esi]
- sub ecx, 16
movdqu [esi + edi], xmm0
lea esi, [esi + 16]
+ sub ecx, 16
jg xloop100
xloop99:
@@ -6619,303 +5710,53 @@ void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
}
}
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 256. Blend 100 / 0.
- cmp eax, 64
- je xloop75 // 64 / 256 is 0.25. Blend 75 / 25.
- cmp eax, 128
- je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
- cmp eax, 192
- je xloop25 // 192 / 256 is 0.75. Blend 25 / 75.
-
- movd xmm5, eax // xmm5 = y fraction
- punpcklbw xmm5, xmm5
- psrlw xmm5, 1
- punpcklwd xmm5, xmm5
- punpckldq xmm5, xmm5
- punpcklqdq xmm5, xmm5
- pxor xmm4, xmm4
-
- align 4
- xloop:
- movdqu xmm0, [esi] // row0
- movdqu xmm2, [esi + edx] // row1
- movdqu xmm1, xmm0
- movdqu xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
- punpcklbw xmm0, xmm4
- punpckhbw xmm1, xmm4
- psubw xmm2, xmm0 // row1 - row0
- psubw xmm3, xmm1
- paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16
- paddw xmm3, xmm3
- pmulhw xmm2, xmm5 // scale diff
- pmulhw xmm3, xmm5
- paddw xmm0, xmm2 // sum rows
- paddw xmm1, xmm3
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqu xmm1, [esi]
- movdqu xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- movdqu xmm0, [esi]
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-__declspec(naked) __declspec(align(16))
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // src_uv_stride
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- sub edi, eax
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- pavgb xmm0, [eax + edx]
- sub ecx, 16
- movdqa [eax + edi], xmm0
- lea eax, [eax + 16]
- jg convertloop
- pop edi
- ret
- }
-}
-
-#ifdef HAS_HALFROW_AVX2
-__declspec(naked) __declspec(align(16))
-void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // src_uv_stride
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- sub edi, eax
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vpavgb ymm0, ymm0, [eax + edx]
- sub ecx, 32
- vmovdqu [eax + edi], ymm0
- lea eax, [eax + 32]
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_HALFROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_bayer
- movd xmm5, [esp + 12] // selector
- mov ecx, [esp + 16] // pix
- pshufd xmm5, xmm5, 0
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pshufb xmm0, xmm5
- pshufb xmm1, xmm5
- punpckldq xmm0, xmm1
- sub ecx, 8
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg wloop
- ret
- }
-}
-
-// Specialized ARGB to Bayer that just isolates G channel.
-__declspec(naked) __declspec(align(16))
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_bayer
- // selector
- mov ecx, [esp + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x000000ff
- psrld xmm5, 24
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrld xmm0, 8 // Move green to bottom.
- psrld xmm1, 8
- pand xmm0, xmm5
- pand xmm1, xmm5
- packssdw xmm0, xmm1
- packuswb xmm0, xmm1
- sub ecx, 8
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg wloop
- ret
- }
-}
-
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // shuffler
- movdqa xmm5, [ecx]
- mov ecx, [esp + 16] // pix
+ movdqu xmm5, [ecx]
+ mov ecx, [esp + 16] // width
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pshufb xmm0, xmm5
- pshufb xmm1, xmm5
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- jg wloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // shuffler
- movdqa xmm5, [ecx]
- mov ecx, [esp + 16] // pix
-
- align 4
wloop:
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
pshufb xmm0, xmm5
pshufb xmm1, xmm5
- sub ecx, 8
movdqu [edx], xmm0
movdqu [edx + 16], xmm1
lea edx, [edx + 32]
+ sub ecx, 8
jg wloop
ret
}
}
#ifdef HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // shuffler
vbroadcastf128 ymm5, [ecx] // same shuffle in high as low.
- mov ecx, [esp + 16] // pix
+ mov ecx, [esp + 16] // width
- align 4
wloop:
vmovdqu ymm0, [eax]
vmovdqu ymm1, [eax + 32]
lea eax, [eax + 64]
vpshufb ymm0, ymm0, ymm5
vpshufb ymm1, ymm1, ymm5
- sub ecx, 16
vmovdqu [edx], ymm0
vmovdqu [edx + 32], ymm1
lea edx, [edx + 64]
+ sub ecx, 16
jg wloop
vzeroupper
@@ -6924,16 +5765,16 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
}
#endif // HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
+ const uint8* shuffler, int width) {
__asm {
push ebx
push esi
mov eax, [esp + 8 + 4] // src_argb
mov edx, [esp + 8 + 8] // dst_argb
mov esi, [esp + 8 + 12] // shuffler
- mov ecx, [esp + 8 + 16] // pix
+ mov ecx, [esp + 8 + 16] // width
pxor xmm5, xmm5
mov ebx, [esi] // shuffler
@@ -6966,7 +5807,6 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
jg shuf_any1
jmp shuf99
- align 4
shuf_0123:
movdqu xmm0, [eax]
lea eax, [eax + 16]
@@ -6978,13 +5818,12 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
pshufhw xmm1, xmm1, 01Bh
pshuflw xmm1, xmm1, 01Bh
packuswb xmm0, xmm1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg shuf_0123
jmp shuf99
- align 4
shuf_0321:
movdqu xmm0, [eax]
lea eax, [eax + 16]
@@ -6996,13 +5835,12 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
pshufhw xmm1, xmm1, 039h
pshuflw xmm1, xmm1, 039h
packuswb xmm0, xmm1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg shuf_0321
jmp shuf99
- align 4
shuf_2103:
movdqu xmm0, [eax]
lea eax, [eax + 16]
@@ -7014,13 +5852,12 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
pshufhw xmm1, xmm1, 093h
pshuflw xmm1, xmm1, 093h
packuswb xmm0, xmm1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg shuf_2103
jmp shuf99
- align 4
shuf_3012:
movdqu xmm0, [eax]
lea eax, [eax + 16]
@@ -7032,9 +5869,9 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
pshufhw xmm1, xmm1, 0C6h
pshuflw xmm1, xmm1, 0C6h
packuswb xmm0, xmm1
- sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg shuf_3012
shuf99:
@@ -7050,7 +5887,7 @@ void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
// UYVY - Macro-pixel = 2 image pixels
// U0Y0V0Y1
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void I422ToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
@@ -7065,7 +5902,6 @@ void I422ToYUY2Row_SSE2(const uint8* src_y,
mov ecx, [esp + 8 + 20] // width
sub edx, esi
- align 4
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
@@ -7088,7 +5924,7 @@ void I422ToYUY2Row_SSE2(const uint8* src_y,
}
}
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void I422ToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
@@ -7103,7 +5939,6 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
mov ecx, [esp + 8 + 20] // width
sub edx, esi
- align 4
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
@@ -7127,7 +5962,7 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
}
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) {
@@ -7140,7 +5975,6 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints.
// 2 pixel loop.
- align 4
convertloop:
// pmovzxbd xmm0, dword ptr [eax] // BGRA pixel
// pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel
@@ -7176,9 +6010,9 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
cvttps2dq xmm4, xmm4
packuswb xmm0, xmm4
packuswb xmm0, xmm0
- sub ecx, 2
movq qword ptr [edx], xmm0
lea edx, [edx + 8]
+ sub ecx, 2
jg convertloop
pop esi
ret
@@ -7187,7 +6021,7 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBPolynomialRow_AVX2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) {
@@ -7202,7 +6036,6 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
mov ecx, [esp + 16] /* width */
// 2 pixel loop.
- align 4
convertloop:
vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels
lea eax, [eax + 8]
@@ -7216,9 +6049,9 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
vpackusdw ymm0, ymm0, ymm0 // b0g0r0a0_00000000_b0g0r0a0_00000000
vpermq ymm0, ymm0, 0xd8 // b0g0r0a0_b0g0r0a0_00000000_00000000
vpackuswb xmm0, xmm0, xmm0 // bgrabgra_00000000_00000000_00000000
- sub ecx, 2
vmovq qword ptr [edx], xmm0
lea edx, [edx + 8]
+ sub ecx, 2
jg convertloop
vzeroupper
ret
@@ -7228,7 +6061,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
#ifdef HAS_ARGBCOLORTABLEROW_X86
// Tranform ARGB pixels with color table.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
int width) {
__asm {
@@ -7238,7 +6071,6 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
mov ecx, [esp + 4 + 12] /* width */
// 1 pixel loop.
- align 4
convertloop:
movzx edx, byte ptr [eax]
lea eax, [eax + 4]
@@ -7263,7 +6095,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
#ifdef HAS_RGBCOLORTABLEROW_X86
// Tranform RGB pixels with color table.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
__asm {
push esi
@@ -7272,7 +6104,6 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
mov ecx, [esp + 4 + 12] /* width */
// 1 pixel loop.
- align 4
convertloop:
movzx edx, byte ptr [eax]
lea eax, [eax + 4]
@@ -7295,7 +6126,7 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width,
const uint8* luma, uint32 lumacoeff) {
@@ -7314,9 +6145,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
pxor xmm5, xmm5
// 4 pixel loop.
- align 4
convertloop:
- movdqu xmm0, qword ptr [eax] // generate luma ptr
+ movdqu xmm0, xmmword ptr [eax] // generate luma ptr
pmaddubsw xmm0, xmm3
phaddw xmm0, xmm0
pand xmm0, xmm4 // mask out low bits
@@ -7381,9 +6211,9 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
movzx edx, byte ptr [eax + 15] // copy alpha.
mov byte ptr [edi + 15], dl
- sub ecx, 4
lea eax, [eax + 16]
lea edi, [edi + 16]
+ sub ecx, 4
jg convertloop
pop edi
@@ -7394,7 +6224,7 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
#endif // defined(_M_X64)
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
+#endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/row_x86.asm b/TMessagesProj/jni/libyuv/source/row_x86.asm
deleted file mode 100644
index 0cb326f8e..000000000
--- a/TMessagesProj/jni/libyuv/source/row_x86.asm
+++ /dev/null
@@ -1,146 +0,0 @@
-;
-; Copyright 2012 The LibYuv Project Authors. All rights reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%ifdef __YASM_VERSION_ID__
-%if __YASM_VERSION_ID__ < 01020000h
-%error AVX2 is supported only by yasm 1.2.0 or later.
-%endif
-%endif
-%include "x86inc.asm"
-
-SECTION .text
-
-; cglobal numeric constants are parameters, gpr regs, mm regs
-
-; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
-
-%macro YUY2TOYROW 2-3
-cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
-%ifidn %1,YUY2
- pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
- psrlw m2, m2, 8
-%endif
-
- ALIGN 4
-.convertloop:
- mov%2 m0, [src_yuy2q]
- mov%2 m1, [src_yuy2q + mmsize]
- lea src_yuy2q, [src_yuy2q + mmsize * 2]
-%ifidn %1,YUY2
- pand m0, m0, m2 ; YUY2 even bytes are Y
- pand m1, m1, m2
-%else
- psrlw m0, m0, 8 ; UYVY odd bytes are Y
- psrlw m1, m1, 8
-%endif
- packuswb m0, m0, m1
-%if cpuflag(AVX2)
- vpermq m0, m0, 0xd8
-%endif
- sub pixd, mmsize
- mov%2 [dst_yq], m0
- lea dst_yq, [dst_yq + mmsize]
- jg .convertloop
- REP_RET
-%endmacro
-
-; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
-INIT_MMX MMX
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_XMM SSE2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_YMM AVX2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW UYVY,a,
-
-; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
-
-%macro SplitUVRow 1-2
-cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
- pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
- psrlw m4, m4, 8
- sub dst_vq, dst_uq
-
- ALIGN 4
-.convertloop:
- mov%1 m0, [src_uvq]
- mov%1 m1, [src_uvq + mmsize]
- lea src_uvq, [src_uvq + mmsize * 2]
- psrlw m2, m0, 8 ; odd bytes
- psrlw m3, m1, 8
- pand m0, m0, m4 ; even bytes
- pand m1, m1, m4
- packuswb m0, m0, m1
- packuswb m2, m2, m3
-%if cpuflag(AVX2)
- vpermq m0, m0, 0xd8
- vpermq m2, m2, 0xd8
-%endif
- mov%1 [dst_uq], m0
- mov%1 [dst_uq + dst_vq], m2
- lea dst_uq, [dst_uq + mmsize]
- sub pixd, mmsize
- jg .convertloop
- REP_RET
-%endmacro
-
-INIT_MMX MMX
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_XMM SSE2
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_YMM AVX2
-SplitUVRow a,
-
-; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-; int width);
-
-%macro MergeUVRow_ 1-2
-cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
- sub src_vq, src_uq
-
- ALIGN 4
-.convertloop:
- mov%1 m0, [src_uq]
- mov%1 m1, [src_vq]
- lea src_uq, [src_uq + mmsize]
- punpcklbw m2, m0, m1 // first 8 UV pairs
- punpckhbw m0, m0, m1 // next 8 UV pairs
-%if cpuflag(AVX2)
- vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
- vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
- mov%1 [dst_uvq], m1
- mov%1 [dst_uvq + mmsize], m2
-%else
- mov%1 [dst_uvq], m2
- mov%1 [dst_uvq + mmsize], m0
-%endif
- lea dst_uvq, [dst_uvq + mmsize * 2]
- sub pixd, mmsize
- jg .convertloop
- REP_RET
-%endmacro
-
-INIT_MMX MMX
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_XMM SSE2
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_YMM AVX2
-MergeUVRow_ a,
-
diff --git a/TMessagesProj/jni/libyuv/source/scale.cc b/TMessagesProj/jni/libyuv/source/scale.cc
index 5b33b5f04..595314f35 100644
--- a/TMessagesProj/jni/libyuv/source/scale.cc
+++ b/TMessagesProj/jni/libyuv/source/scale.cc
@@ -23,9 +23,6 @@ namespace libyuv {
extern "C" {
#endif
-// Remove this macro if OVERREAD is safe.
-#define AVOID_OVERREAD 1
-
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
@@ -44,9 +41,8 @@ static void ScalePlaneDown2(int src_width, int src_height,
int y;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
- filtering == kFilterNone ? ScaleRowDown2_C :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
- ScaleRowDown2Box_C);
+ filtering == kFilterNone ? ScaleRowDown2_C :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
int row_stride = src_stride << 1;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
@@ -54,23 +50,42 @@ static void ScalePlaneDown2(int src_width, int src_height,
}
#if defined(HAS_SCALEROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
- }
-#elif defined(HAS_SCALEROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
- ScaleRowDown2Box_Unaligned_SSE2);
- if (IS_ALIGNED(src_ptr, 16) &&
- IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
- ScaleRowDown2Box_SSE2);
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
+ ScaleRowDown2Box_Any_NEON);
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
+ ScaleRowDown2Box_NEON);
}
}
-#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
+#endif
+#if defined(HAS_SCALEROWDOWN2_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 :
+ ScaleRowDown2Box_Any_SSSE3);
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 :
+ ScaleRowDown2Box_SSSE3);
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
+ ScaleRowDown2Box_Any_AVX2);
+ if (IS_ALIGNED(dst_width, 32)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
+ ScaleRowDown2Box_AVX2);
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@@ -112,21 +127,15 @@ static void ScalePlaneDown2_16(int src_width, int src_height,
ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
ScaleRowDown2_16_NEON;
}
-#elif defined(HAS_SCALEROWDOWN2_16_SSE2)
+#endif
+#if defined(HAS_SCALEROWDOWN2_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ?
- ScaleRowDown2_Unaligned_16_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
- ScaleRowDown2Box_Unaligned_16_SSE2);
- if (IS_ALIGNED(src_ptr, 16) &&
- IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
- ScaleRowDown2Box_16_SSE2);
- }
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
+ ScaleRowDown2Box_16_SSE2);
}
-#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
+#endif
+#if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@@ -165,16 +174,33 @@ static void ScalePlaneDown4(int src_width, int src_height,
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN4_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown4 = filtering ?
+ ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
+ }
}
-#elif defined(HAS_SCALEROWDOWN4_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
+#endif
+#if defined(HAS_SCALEROWDOWN4_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowDown4 = filtering ?
+ ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
+ }
}
-#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
+#endif
+#if defined(HAS_SCALEROWDOWN4_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown4 = filtering ?
+ ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@@ -212,14 +238,14 @@ static void ScalePlaneDown4_16(int src_width, int src_height,
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
ScaleRowDown4_16_NEON;
}
-#elif defined(HAS_SCALEROWDOWN4_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+#endif
+#if defined(HAS_SCALEROWDOWN4_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
ScaleRowDown4_16_SSE2;
}
-#elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
+#endif
+#if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@@ -260,25 +286,42 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
}
#if defined(HAS_SCALEROWDOWN34_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
+ if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_NEON;
+ ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
} else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
+ }
+ if (dst_width % 24 == 0) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_NEON;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
+ }
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
+ ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
} else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
+ }
+ if (dst_width % 24 == 0) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
+ }
}
}
#endif
@@ -351,8 +394,7 @@ static void ScalePlaneDown34_16(int src_width, int src_height,
}
#endif
#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
@@ -435,28 +477,47 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
}
+
#if defined(HAS_SCALEROWDOWN38_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
+ if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_NEON;
+ ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
} else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
+ }
+ if (dst_width % 12 == 0) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_NEON;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
+ }
}
}
-#elif defined(HAS_SCALEROWDOWN38_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+#endif
+#if defined(HAS_SCALEROWDOWN38_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
+ }
+ if (dst_width % 12 == 0 && !filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
- } else {
+ }
+ if (dst_width % 6 == 0 && filtering) {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
}
}
-#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
+#endif
+#if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@@ -522,9 +583,9 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
}
}
-#elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+#endif
+#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
@@ -533,7 +594,8 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
}
}
-#elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
+#endif
+#if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
@@ -570,65 +632,7 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
}
}
-static __inline uint32 SumBox(int iboxwidth, int iboxheight,
- ptrdiff_t src_stride, const uint8* src_ptr) {
- uint32 sum = 0u;
- int y;
- assert(iboxwidth > 0);
- assert(iboxheight > 0);
- for (y = 0; y < iboxheight; ++y) {
- int x;
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- src_ptr += src_stride;
- }
- return sum;
-}
-
-static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
- ptrdiff_t src_stride, const uint16* src_ptr) {
- uint32 sum = 0u;
- int y;
- assert(iboxwidth > 0);
- assert(iboxheight > 0);
- for (y = 0; y < iboxheight; ++y) {
- int x;
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- src_ptr += src_stride;
- }
- return sum;
-}
-
-static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
- int x, int dx, ptrdiff_t src_stride,
- const uint8* src_ptr, uint8* dst_ptr) {
- int i;
- int boxwidth;
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
- (boxwidth * boxheight);
- }
-}
-
-static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
- int x, int dx, ptrdiff_t src_stride,
- const uint16* src_ptr, uint16* dst_ptr) {
- int i;
- int boxwidth;
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
- (boxwidth * boxheight);
- }
-}
+#define MIN1(x) ((x) < 1 ? 1 : (x))
static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
uint32 sum = 0u;
@@ -654,15 +658,15 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
int i;
int scaletbl[2];
- int minboxwidth = (dx >> 16);
+ int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
- scaletbl[0] = 65536 / (minboxwidth * boxheight);
- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
- boxwidth = (x >> 16) - ix;
+ boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
}
}
@@ -671,25 +675,36 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
int i;
int scaletbl[2];
- int minboxwidth = (dx >> 16);
+ int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
- scaletbl[0] = 65536 / (minboxwidth * boxheight);
- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
- scaleptr[boxwidth] >> 16;
+ boxwidth = MIN1((x >> 16) - ix);
+ *dst_ptr++ =
+ SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
+ }
+}
+
+static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
+ const uint16* src_ptr, uint8* dst_ptr) {
+ int scaleval = 65536 / boxheight;
+ int i;
+ src_ptr += (x >> 16);
+ for (i = 0; i < dst_width; ++i) {
+ *dst_ptr++ = src_ptr[i] * scaleval >> 16;
}
}
static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
- int boxwidth = (dx >> 16);
+ int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
+ x >>= 16;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
x += boxwidth;
@@ -698,7 +713,7 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
- int boxwidth = (dx >> 16);
+ int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
for (i = 0; i < dst_width; ++i) {
@@ -718,7 +733,7 @@ static void ScalePlaneBox(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr) {
- int j;
+ int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -728,10 +743,40 @@ static void ScalePlaneBox(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
- uint8* dst = dst_ptr;
- int j;
+ {
+ // Allocate a row buffer of uint16.
+ align_buffer_64(row16, src_width * 2);
+ void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
+ const uint16* src_ptr, uint8* dst_ptr) =
+ (dx & 0xffff) ? ScaleAddCols2_C:
+ ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
+ void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
+ ScaleAddRow_C;
+#if defined(HAS_SCALEADDROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleAddRow = ScaleAddRow_Any_SSE2;
+ if (IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEADDROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleAddRow = ScaleAddRow_Any_AVX2;
+ if (IS_ALIGNED(src_width, 32)) {
+ ScaleAddRow = ScaleAddRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEADDROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleAddRow = ScaleAddRow_Any_NEON;
+ if (IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_NEON;
+ }
+ }
+#endif
+
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
@@ -740,46 +785,13 @@ static void ScalePlaneBox(int src_width, int src_height,
if (y > max_y) {
y = max_y;
}
- boxheight = (y >> 16) - iy;
- ScalePlaneBoxRow_C(dst_width, boxheight,
- x, dx, src_stride,
- src, dst);
- dst += dst_stride;
- }
- return;
- }
- {
- // Allocate a row buffer of uint16.
- align_buffer_64(row16, src_width * 2);
- void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) =
- (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
- void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
-
-#if defined(HAS_SCALEADDROWS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
-#ifdef AVOID_OVERREAD
- IS_ALIGNED(src_width, 16) &&
-#endif
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- ScaleAddRows = ScaleAddRows_SSE2;
- }
-#endif
-
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint8* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > (src_height << 16)) {
- y = (src_height << 16);
+ boxheight = MIN1((y >> 16) - iy);
+ memset(row16, 0, src_width * 2);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint16 *)(row16), src_width);
+ src += src_stride;
}
- boxheight = (y >> 16) - iy;
- ScaleAddRows(src, src_stride, (uint16*)(row16),
- src_width, boxheight);
- ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
- dst_ptr);
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row16);
@@ -790,7 +802,7 @@ static void ScalePlaneBox_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr) {
- int j;
+ int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -800,10 +812,21 @@ static void ScalePlaneBox_16(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
- uint16* dst = dst_ptr;
- int j;
+ {
+ // Allocate a row buffer of uint32.
+ align_buffer_64(row32, src_width * 4);
+ void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
+ const uint32* src_ptr, uint16* dst_ptr) =
+ (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
+ void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
+ ScaleAddRow_16_C;
+
+#if defined(HAS_SCALEADDROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_16_SSE2;
+ }
+#endif
+
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
@@ -812,46 +835,13 @@ static void ScalePlaneBox_16(int src_width, int src_height,
if (y > max_y) {
y = max_y;
}
- boxheight = (y >> 16) - iy;
- ScalePlaneBoxRow_16_C(dst_width, boxheight,
- x, dx, src_stride,
- src, dst);
- dst += dst_stride;
- }
- return;
- }
- {
- // Allocate a row buffer of uint32.
- align_buffer_64(row32, src_width * 4);
- void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
- const uint32* src_ptr, uint16* dst_ptr) =
- (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
- void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
-
-#if defined(HAS_SCALEADDROWS_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
-#ifdef AVOID_OVERREAD
- IS_ALIGNED(src_width, 16) &&
-#endif
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- ScaleAddRows = ScaleAddRows_16_SSE2;
- }
-#endif
-
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint16* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > (src_height << 16)) {
- y = (src_height << 16);
+ boxheight = MIN1((y >> 16) - iy);
+ memset(row32, 0, src_width * 4);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint32 *)(row32), src_width);
+ src += src_stride;
}
- boxheight = (y >> 16) - iy;
- ScaleAddRows(src, src_stride, (uint32*)(row32),
- src_width, boxheight);
- ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
- dst_ptr);
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row32);
@@ -885,30 +875,16 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
+ InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@@ -916,7 +892,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
@@ -924,7 +900,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
@@ -937,6 +913,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
+#endif
+#if defined(HAS_SCALEFILTERCOLS_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleFilterCols_NEON;
+ }
+ }
#endif
if (y > max_y) {
y = max_y;
@@ -988,29 +972,23 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
- if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_16_SSE2;
- }
+ InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
- if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_16_SSSE3;
- }
+ InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
@@ -1018,7 +996,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
@@ -1026,7 +1004,7 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
@@ -1080,36 +1058,22 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) =
- filtering ? ScaleFilterCols_C : ScaleCols_C;
+ int dst_width, int x, int dx) =
+ filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
+ InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@@ -1117,7 +1081,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
@@ -1125,7 +1089,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
@@ -1140,13 +1104,19 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
+#endif
+#if defined(HAS_SCALEFILTERCOLS_NEON)
+ if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleFilterCols_NEON;
+ }
+ }
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_SSE2;
}
#endif
@@ -1160,7 +1130,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
const uint8* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 15) & ~15;
+ const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
@@ -1219,36 +1189,30 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_16_C;
void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) =
- filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
+ int dst_width, int x, int dx) =
+ filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
- if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_16_SSE2;
- }
+ InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
- if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_16_SSSE3;
- }
+ InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
@@ -1256,7 +1220,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
@@ -1264,7 +1228,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
@@ -1283,9 +1247,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2;
}
#endif
@@ -1299,7 +1261,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
const uint16* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 15) & ~15;
+ const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4);
uint16* rowptr = (uint16*)row;
@@ -1366,17 +1328,14 @@ static void ScalePlaneSimple(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_SSE2;
}
#endif
}
for (i = 0; i < dst_height; ++i) {
- ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
- dst_width, x, dx);
+ ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
dst_ptr += dst_stride;
y += dy;
}
@@ -1401,9 +1360,7 @@ static void ScalePlaneSimple_16(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_SSE2;
}
#endif
@@ -1428,8 +1385,7 @@ void ScalePlane(const uint8* src, int src_stride,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
+ dst_width, dst_height, filtering);
// Negative height means invert the image.
if (src_height < 0) {
@@ -1445,9 +1401,9 @@ void ScalePlane(const uint8* src, int src_stride,
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
return;
}
- if (dst_width == src_width) {
+ if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height);
- // Arbitrary scale vertically, but unscaled vertically.
+ // Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height,
dst_width, dst_height,
src_stride, dst_stride, src, dst,
@@ -1478,7 +1434,7 @@ void ScalePlane(const uint8* src, int src_stride,
return;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
- filtering != kFilterBilinear) {
+ (filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4
ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
@@ -1512,8 +1468,7 @@ void ScalePlane_16(const uint16* src, int src_stride,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
+ dst_width, dst_height, filtering);
// Negative height means invert the image.
if (src_height < 0) {
@@ -1606,6 +1561,7 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}
@@ -1637,6 +1593,7 @@ int I420Scale_16(const uint16* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}
diff --git a/TMessagesProj/jni/libyuv/source/scale_any.cc b/TMessagesProj/jni/libyuv/source/scale_any.cc
new file mode 100644
index 000000000..1da4dacde
--- /dev/null
+++ b/TMessagesProj/jni/libyuv/source/scale_any.cc
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2015 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+#include "libyuv/scale_row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
+#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
+ void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
+ int dst_width, int x, int dx) { \
+ int n = dst_width & ~MASK; \
+ if (n > 0) { \
+ TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
+ } \
+ TERP_C(dst_ptr + n * BPP, src_ptr, \
+ dst_width & MASK, x + n * dx, dx); \
+ }
+
+#ifdef HAS_SCALEFILTERCOLS_NEON
+CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
+#endif
+#ifdef HAS_SCALEARGBCOLS_NEON
+CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
+#endif
+#ifdef HAS_SCALEARGBFILTERCOLS_NEON
+CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
+ ScaleARGBFilterCols_C, 4, 3)
+#endif
+#undef CANY
+
+// Fixed scale down.
+#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
+ uint8* dst_ptr, int dst_width) { \
+ int r = (int)((unsigned int)dst_width % (MASK + 1)); \
+ int n = dst_width - r; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
+ dst_ptr + n * BPP, r); \
+ }
+
+#ifdef HAS_SCALEROWDOWN2_SSSE3
+SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
+SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
+ ScaleRowDown2Linear_C, 2, 1, 15)
+SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
+ 2, 1, 15)
+#endif
+#ifdef HAS_SCALEROWDOWN2_AVX2
+SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
+SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
+ ScaleRowDown2Linear_C, 2, 1, 31)
+SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
+ 2, 1, 31)
+#endif
+#ifdef HAS_SCALEROWDOWN2_NEON
+SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
+SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
+ ScaleRowDown2Linear_C, 2, 1, 15)
+SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
+ ScaleRowDown2Box_C, 2, 1, 15)
+#endif
+#ifdef HAS_SCALEROWDOWN4_SSSE3
+SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
+SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C,
+ 4, 1, 7)
+#endif
+#ifdef HAS_SCALEROWDOWN4_AVX2
+SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
+SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C,
+ 4, 1, 15)
+#endif
+#ifdef HAS_SCALEROWDOWN4_NEON
+SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
+SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
+ 4, 1, 7)
+#endif
+#ifdef HAS_SCALEROWDOWN34_SSSE3
+SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3,
+ ScaleRowDown34_C, 4 / 3, 1, 23)
+SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
+ ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
+SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
+ ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
+#endif
+#ifdef HAS_SCALEROWDOWN34_NEON
+SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON,
+ ScaleRowDown34_C, 4 / 3, 1, 23)
+SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
+ ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
+SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
+ ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
+#endif
+#ifdef HAS_SCALEROWDOWN38_SSSE3
+SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3,
+ ScaleRowDown38_C, 8 / 3, 1, 11)
+SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
+ ScaleRowDown38_3_Box_C, 8 / 3, 1, 5)
+SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
+ ScaleRowDown38_2_Box_C, 8 / 3, 1, 5)
+#endif
+#ifdef HAS_SCALEROWDOWN38_NEON
+SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON,
+ ScaleRowDown38_C, 8 / 3, 1, 11)
+SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
+ ScaleRowDown38_3_Box_C, 8 / 3, 1, 11)
+SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
+ ScaleRowDown38_2_Box_C, 8 / 3, 1, 11)
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2_SSE2
+SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2,
+ ScaleARGBRowDown2_C, 2, 4, 3)
+SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2,
+ ScaleARGBRowDown2Linear_C, 2, 4, 3)
+SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2,
+ ScaleARGBRowDown2Box_C, 2, 4, 3)
+#endif
+#ifdef HAS_SCALEARGBROWDOWN2_NEON
+SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON,
+ ScaleARGBRowDown2_C, 2, 4, 7)
+SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
+ ScaleARGBRowDown2Linear_C, 2, 4, 7)
+SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
+ ScaleARGBRowDown2Box_C, 2, 4, 7)
+#endif
+#undef SDANY
+
+// Scale down by even scale factor.
+#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
+ uint8* dst_ptr, int dst_width) { \
+ int r = (int)((unsigned int)dst_width % (MASK + 1)); \
+ int n = dst_width - r; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \
+ src_stepx, dst_ptr + n * BPP, r); \
+ }
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
+SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
+ ScaleARGBRowDownEven_C, 4, 3)
+SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
+ ScaleARGBRowDownEvenBox_C, 4, 3)
+#endif
+#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
+SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
+ ScaleARGBRowDownEven_C, 4, 3)
+SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
+ ScaleARGBRowDownEvenBox_C, 4, 3)
+#endif
+
+// Add rows box filter scale down.
+#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
+ int n = src_width & ~MASK; \
+ if (n > 0) { \
+ SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
+ }
+
+#ifdef HAS_SCALEADDROW_SSE2
+SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
+#endif
+#ifdef HAS_SCALEADDROW_AVX2
+SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
+#endif
+#ifdef HAS_SCALEADDROW_NEON
+SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
+#endif
+#undef SAANY
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+
+
+
+
diff --git a/TMessagesProj/jni/libyuv/source/scale_argb.cc b/TMessagesProj/jni/libyuv/source/scale_argb.cc
index e339cd7c7..adddf9db5 100644
--- a/TMessagesProj/jni/libyuv/source/scale_argb.cc
+++ b/TMessagesProj/jni/libyuv/source/scale_argb.cc
@@ -53,18 +53,27 @@ static void ScaleARGBDown2(int src_width, int src_height,
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
- ScaleARGBRowDown2Box_SSE2);
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
+ (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
+ ScaleARGBRowDown2Box_Any_SSE2);
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
+ (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
+ ScaleARGBRowDown2Box_SSE2);
+ }
}
-#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
- ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
- ScaleARGBRowDown2_NEON;
+#endif
+#if defined(HAS_SCALEARGBROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
+ (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
+ ScaleARGBRowDown2Box_Any_NEON);
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
+ (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
+ ScaleARGBRowDown2Box_NEON);
+ }
}
#endif
@@ -88,7 +97,7 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
int x, int dx, int y, int dy) {
int j;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
+ const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
@@ -98,17 +107,22 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
- }
-#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
+ }
}
#endif
+#if defined(HAS_SCALEARGBROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
+ }
+ }
+#endif
+
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
@@ -139,16 +153,23 @@ static void ScaleARGBDownEven(int src_width, int src_height,
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
- ScaleARGBRowDownEven_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
+ ScaleARGBRowDownEven_Any_SSE2;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
+ ScaleARGBRowDownEven_SSE2;
+ }
}
-#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(src_argb, 4)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
- ScaleARGBRowDownEven_NEON;
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
+ ScaleARGBRowDownEven_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
+ ScaleARGBRowDownEven_NEON;
+ }
}
#endif
@@ -189,30 +210,16 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4;
x -= (int)(xl << 16);
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
+ InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@@ -220,15 +227,15 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
+#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) {
@@ -240,6 +247,14 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
+ }
+ }
#endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
@@ -285,30 +300,16 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
+ InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
@@ -316,15 +317,15 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
+#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
@@ -338,17 +339,31 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
+ if (filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
+ }
+ }
+#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
+#endif
+#if defined(HAS_SCALEARGBCOLS_NEON)
+ if (!filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBCols_NEON;
+ }
+ }
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
@@ -363,7 +378,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 15) & ~15;
+ const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
@@ -427,18 +442,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) {
- I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
@@ -446,7 +458,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
+ if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
@@ -466,30 +478,16 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
+ InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
@@ -497,15 +495,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
+#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
@@ -523,17 +521,31 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
+ if (filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
+ }
+ }
+#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
+#endif
+#if defined(HAS_SCALEARGBCOLS_NEON)
+ if (!filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBCols_NEON;
+ }
+ }
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
@@ -551,7 +563,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 15) & ~15;
+ const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion.
@@ -636,13 +648,19 @@ static void ScaleARGBSimple(int src_width, int src_height,
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBCols = ScaleARGBCols_SSE2;
}
+#endif
+#if defined(HAS_SCALEARGBCOLS_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBCols = ScaleARGBCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBCols = ScaleARGBCols_NEON;
+ }
+ }
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
}
#endif
@@ -776,6 +794,7 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
clip_x < 0 || clip_y < 0 ||
+ clip_width > 32768 || clip_height > 32768 ||
(clip_x + clip_width) > dst_width ||
(clip_y + clip_height) > dst_height) {
return -1;
@@ -794,6 +813,7 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 ||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1;
}
@@ -803,6 +823,37 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb,
return 0;
}
+// Scale with YUV conversion to ARGB and clipping.
+LIBYUV_API
+int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
+ const uint8* src_u, int src_stride_u,
+ const uint8* src_v, int src_stride_v,
+ uint32 src_fourcc,
+ int src_width, int src_height,
+ uint8* dst_argb, int dst_stride_argb,
+ uint32 dst_fourcc,
+ int dst_width, int dst_height,
+ int clip_x, int clip_y, int clip_width, int clip_height,
+ enum FilterMode filtering) {
+
+ uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
+ int r;
+ I420ToARGB(src_y, src_stride_y,
+ src_u, src_stride_u,
+ src_v, src_stride_v,
+ argb_buffer, src_width * 4,
+ src_width, src_height);
+
+ r = ARGBScaleClip(argb_buffer, src_width * 4,
+ src_width, src_height,
+ dst_argb, dst_stride_argb,
+ dst_width, dst_height,
+ clip_x, clip_y, clip_width, clip_height,
+ filtering);
+ free(argb_buffer);
+ return r;
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/TMessagesProj/jni/libyuv/source/scale_common.cc b/TMessagesProj/jni/libyuv/source/scale_common.cc
index e4b2acc41..f5c908d0b 100644
--- a/TMessagesProj/jni/libyuv/source/scale_common.cc
+++ b/TMessagesProj/jni/libyuv/source/scale_common.cc
@@ -621,39 +621,31 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
}
}
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
- assert(src_height > 0);
- for (x = 0; x < src_width; ++x) {
- const uint8* s = src_ptr + x;
- unsigned int sum = 0u;
- int y;
- for (y = 0; y < src_height; ++y) {
- sum += s[0];
- s += src_stride;
- }
- // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
- dst_ptr[x] = sum < 65535u ? sum : 65535u;
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
}
}
-void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
- assert(src_height > 0);
- for (x = 0; x < src_width; ++x) {
- const uint16* s = src_ptr + x;
- unsigned int sum = 0u;
- int y;
- for (y = 0; y < src_height; ++y) {
- sum += s[0];
- s += src_stride;
- }
- // No risk of overflow here now
- dst_ptr[x] = sum;
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
}
}
@@ -884,32 +876,16 @@ void ScalePlaneVertical(int src_height,
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * bpp;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
+ InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_AVX2;
@@ -917,15 +893,15 @@ void ScalePlaneVertical(int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
+#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
@@ -967,31 +943,23 @@ void ScalePlaneVertical_16(int src_height,
assert(dst_height > 0);
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
+ if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_16_SSE2;
- }
+ InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_16_SSSE3;
- }
+ InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
+ if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
@@ -999,15 +967,15 @@ void ScalePlaneVertical_16(int src_height,
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
+ if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
}
}
#endif
-#if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
+#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
+ if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
@@ -1046,10 +1014,6 @@ enum FilterMode ScaleFilterReduce(int src_width, int src_height,
if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
filtering = kFilterBilinear;
}
- // If scaling to larger, switch from Box to Bilinear.
- if (dst_width >= src_width || dst_height >= src_height) {
- filtering = kFilterBilinear;
- }
}
if (filtering == kFilterBilinear) {
if (src_height == 1) {
diff --git a/TMessagesProj/jni/libyuv/source/scale_posix.cc b/TMessagesProj/jni/libyuv/source/scale_gcc.cc
similarity index 69%
rename from TMessagesProj/jni/libyuv/source/scale_posix.cc
rename to TMessagesProj/jni/libyuv/source/scale_gcc.cc
index 352e66782..a1ae4e277 100644
--- a/TMessagesProj/jni/libyuv/source/scale_posix.cc
+++ b/TMessagesProj/jni/libyuv/source/scale_gcc.cc
@@ -9,6 +9,7 @@
*/
#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
@@ -16,7 +17,8 @@ extern "C" {
#endif
// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
// Offsets for source bytes 0 to 9
static uvec8 kShuf0 =
@@ -96,112 +98,8 @@ static uvec16 kScaleAb2 =
// Generated using gcc disassembly on Visual C object file:
// objdump -D yuvscaler.obj >yuvscaler.txt
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm2) // movdqa (%0,%3,1),%%xmm2
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,3,1,xmm3) // movdqa 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
+void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
asm volatile (
LABELALIGN
"1: \n"
@@ -218,35 +116,28 @@ void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ :: "memory", "cc", "xmm0", "xmm1"
);
}
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
+void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pavgw %%xmm5,%%xmm0 \n"
+ "pavgw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"sub $0x10,%2 \n"
@@ -254,40 +145,36 @@ void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
+ :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
);
}
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
+void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
- BUNDLEALIGN
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "psrlw $0x1,%%xmm0 \n"
+ "psrlw $0x1,%%xmm1 \n"
+ "pavgw %%xmm5,%%xmm0 \n"
+ "pavgw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"sub $0x10,%2 \n"
@@ -296,17 +183,110 @@ void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+#ifdef HAS_SCALEROWDOWN2_AVX2
+void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :: "memory", "cc", "xmm0", "xmm1"
+ );
+}
+
+void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
+ );
+}
+
+void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
+ MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpsrlw $0x1,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x1,%%ymm1,%%ymm1 \n"
+ "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x20,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+ );
+}
+#endif // HAS_SCALEROWDOWN2_AVX2
+
+void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
@@ -315,8 +295,8 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"pand %%xmm5,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
@@ -330,55 +310,50 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
+ :: "memory", "cc", "xmm0", "xmm1", "xmm5"
);
}
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stridex3 = 0;
asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0x8,%%xmm7 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "psllw $0x3,%%xmm5 \n"
"lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,4,2,xmm2) // movdqa (%0,%4,2),%%xmm2
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,4,2,xmm3) // movdqa 0x10(%0,%4,2),%%xmm3
- MEMOPREG(movdqa,0x00,0,3,1,xmm4) // movdqa (%0,%3,1),%%xmm4
- MEMOPREG(movdqa,0x10,0,3,1,xmm5) // movdqa 0x10(%0,%3,1),%%xmm5
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
+ MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2
+ MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
+ MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm4,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm5,%%xmm3 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm7,%%xmm2 \n"
- "pand %%xmm7,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "pand %%xmm7,%%xmm2 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "psrlw $0x4,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
"movq %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x8,1) ",%1 \n"
"sub $0x8,%2 \n"
@@ -388,16 +363,98 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
"+r"(dst_width), // %2
"+r"(stridex3) // %3
: "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
+
+#ifdef HAS_SCALEROWDOWN4_AVX2
+void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrld $0x18,%%ymm5,%%ymm5 \n"
+ "vpslld $0x10,%%ymm5,%%ymm5 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%xmm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :: "memory", "cc", "xmm0", "xmm1", "xmm5"
+ );
+}
+
+void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ asm volatile (
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpsllw $0x3,%%ymm4,%%ymm5 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
+ MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
+ MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2
+ MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2
+ MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3
+ "lea " MEMLEA(0x40,0) ",%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x4,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%xmm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "r"((intptr_t)(src_stride * 3)) // %4
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_SCALEROWDOWN4_AVX2
+
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
@@ -412,8 +469,8 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm2 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm2 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"palignr $0x8,%%xmm0,%%xmm1 \n"
@@ -429,11 +486,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
+ :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
@@ -461,8 +514,8 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm7) // movdqa (%0,%3),%%xmm7
+ "movdqu " MEMACCESS(0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3),%%xmm7
"pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm2,%%xmm6 \n"
"pmaddubsw %%xmm5,%%xmm6 \n"
@@ -479,9 +532,8 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
"psrlw $0x2,%%xmm6 \n"
"packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,3,1,xmm7) // movdqa 0x10(%0,%3),%%xmm7
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7
"lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm4,%%xmm6 \n"
@@ -498,13 +550,8 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
"+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)), // %3
"m"(kMadd21) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
@@ -533,8 +580,8 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm7) // movdqa (%0,%3,1),%%xmm7
+ "movdqu " MEMACCESS(0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3,1),%%xmm7
"pavgb %%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm2,%%xmm6 \n"
@@ -553,8 +600,8 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
"psrlw $0x2,%%xmm6 \n"
"packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- MEMOPREG(movdqa,0x10,0,3,1,xmm7) // movdqa 0x10(%0,%3,1),%%xmm7
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
+ MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7
"lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n"
@@ -572,13 +619,8 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
"+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)), // %3
"m"(kMadd21) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
@@ -590,8 +632,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"pshufb %%xmm4,%%xmm0 \n"
"pshufb %%xmm5,%%xmm1 \n"
@@ -607,10 +649,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
"+r"(dst_width) // %2
: "m"(kShuf38a), // %3
"m"(kShuf38b) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm4", "xmm5"
-#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
);
}
@@ -631,9 +670,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3,1),%%xmm0
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1
"lea " MEMLEA(0x10,0) ",%0 \n"
+ "pavgb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pshufb %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,%%xmm6 \n"
@@ -643,23 +683,18 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
"paddusw %%xmm0,%%xmm1 \n"
"pmulhuw %%xmm5,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
- "sub $0x6,%2 \n"
"movd %%xmm1," MEMACCESS(1) " \n"
"psrlq $0x10,%%xmm1 \n"
"movd %%xmm1," MEMACCESS2(0x2,1) " \n"
"lea " MEMLEA(0x6,1) ",%1 \n"
+ "sub $0x6,%2 \n"
"jg 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
@@ -679,8 +714,8 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm6) // movdqa (%0,%3,1),%%xmm6
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ MEMOPREG(movdqu,0x00,0,3,1,xmm6) // movdqu (%0,%3,1),%%xmm6
"movhlps %%xmm0,%%xmm1 \n"
"movhlps %%xmm6,%%xmm7 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
@@ -689,7 +724,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
"punpcklbw %%xmm5,%%xmm7 \n"
"paddusw %%xmm6,%%xmm0 \n"
"paddusw %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,3,2,xmm6) // movdqa (%0,%3,2),%%xmm6
+ MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6
"lea " MEMLEA(0x10,0) ",%0 \n"
"movhlps %%xmm6,%%xmm7 \n"
"punpcklbw %%xmm5,%%xmm6 \n"
@@ -711,80 +746,81 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
"paddusw %%xmm7,%%xmm6 \n"
"pmulhuw %%xmm4,%%xmm6 \n"
"packuswb %%xmm6,%%xmm6 \n"
- "sub $0x6,%2 \n"
"movd %%xmm6," MEMACCESS(1) " \n"
"psrlq $0x10,%%xmm6 \n"
"movd %%xmm6," MEMACCESS2(0x2,1) " \n"
"lea " MEMLEA(0x6,1) ",%1 \n"
+ "sub $0x6,%2 \n"
"jg 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
- int tmp_height = 0;
- intptr_t tmp_src = 0;
+// Reads 16xN bytes and produces 16 shorts at a time.
+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
asm volatile (
- "pxor %%xmm4,%%xmm4 \n"
- "sub $0x1,%5 \n"
+ "pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "mov %0,%3 \n"
- "add %6,%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm4,%%xmm0 \n"
- "punpckhbw %%xmm4,%%xmm1 \n"
- "mov %5,%2 \n"
- "test %2,%2 \n"
- "je 3f \n"
-
- LABELALIGN
- "2: \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "add %6,%0 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm3 \n"
+ "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
+ "movdqu " MEMACCESS(1) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpckhbw %%xmm5,%%xmm3 \n"
"paddusw %%xmm2,%%xmm0 \n"
"paddusw %%xmm3,%%xmm1 \n"
- "sub $0x1,%2 \n"
- "jg 2b \n"
-
- LABELALIGN
- "3: \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x10,3) ",%0 \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x10,%4 \n"
+ "sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
- "+r"(tmp_height), // %2
- "+r"(tmp_src), // %3
- "+r"(src_width), // %4
- "+rm"(src_height) // %5
- : "rm"((intptr_t)(src_stride)) // %6
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
+ "+r"(src_width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
+
+#ifdef HAS_SCALEADDROW_AVX2
+// Reads 32 bytes and accumulates to 32 shorts at a time.
+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
+ asm volatile (
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ",%%ymm3 \n"
+ "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32
+ "vpermq $0xd8,%%ymm3,%%ymm3 \n"
+ "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
+ "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n"
+ "vpaddusw " MEMACCESS2(0x20,1) ",%%ymm3,%%ymm1 \n"
+ "vmovdqu %%ymm0," MEMACCESS(1) " \n"
+ "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
+ "lea " MEMLEA(0x40,1) ",%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(src_width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+ );
+}
+#endif // HAS_SCALEADDROW_AVX2
+
// Bilinear column filtering. SSSE3 version.
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
@@ -813,7 +849,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n"
- BUNDLEALIGN
MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2
"movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n"
@@ -853,13 +888,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"+rm"(dst_width) // %5
: "rm"(x), // %6
"rm"(dx) // %7
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
@@ -870,25 +900,21 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(1) ",%%xmm0 \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm1 \n"
- "sub $0x20,%2 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n"
+ "movdqu %%xmm0," MEMACCESS(0) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
+ "sub $0x20,%2 \n"
"jg 1b \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ :: "memory", "cc", "xmm0", "xmm1"
);
}
@@ -898,22 +924,18 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"shufps $0xdd,%%xmm1,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ :: "memory", "cc", "xmm0", "xmm1"
);
}
@@ -923,25 +945,21 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
"shufps $0xdd,%%xmm1,%%xmm2 \n"
"pavgb %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ :: "memory", "cc", "xmm0", "xmm1"
);
}
@@ -951,11 +969,10 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x00,0,3,1,xmm2) // movdqa (%0,%3,1),%%xmm2
- MEMOPREG(movdqa,0x10,0,3,1,xmm3) // movdqa 0x10(%0,%3,1),%%xmm3
+ "movdqu " MEMACCESS(0) ",%%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
+ MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
+ MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
"lea " MEMLEA(0x20,0) ",%0 \n"
"pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n"
@@ -963,29 +980,23 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
"shufps $0x88,%%xmm1,%%xmm0 \n"
"shufps $0xdd,%%xmm1,%%xmm2 \n"
"pavgb %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
+ "movdqu %%xmm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
+ "sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3"
);
}
// Reads 4 pixels at a time.
// Alignment requirement: dst_argb 16 byte aligned.
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
+ int src_stepx, uint8* dst_argb, int dst_width) {
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
intptr_t src_stepx_x12 = 0;
asm volatile (
@@ -996,29 +1007,22 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
"movd " MEMACCESS(0) ",%%xmm0 \n"
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
"punpckldq %%xmm1,%%xmm0 \n"
- BUNDLEALIGN
MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
"punpckldq %%xmm3,%%xmm2 \n"
"punpcklqdq %%xmm2,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
+ "movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x4,%3 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(src_stepx_x4), // %1
"+r"(dst_argb), // %2
"+r"(dst_width), // %3
"+r"(src_stepx_x12) // %4
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
+ :: "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3"
);
}
@@ -1040,11 +1044,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
"movq " MEMACCESS(0) ",%%xmm0 \n"
MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
- BUNDLEALIGN
MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
"movq " MEMACCESS(5) ",%%xmm2 \n"
- BUNDLEALIGN
MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
@@ -1055,9 +1057,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
"shufps $0x88,%%xmm1,%%xmm0 \n"
"shufps $0xdd,%%xmm1,%%xmm2 \n"
"pavgb %%xmm2,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
+ "movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x4,%3 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(src_stepx_x4), // %1
@@ -1065,14 +1067,8 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
"+rm"(dst_width), // %3
"+r"(src_stepx_x12), // %4
"+r"(row1) // %5
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
+ :: "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3"
);
}
@@ -1111,15 +1107,14 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
"pextrw $0x3,%%xmm2,%k1 \n"
"punpckldq %%xmm4,%%xmm1 \n"
"punpcklqdq %%xmm1,%%xmm0 \n"
- "sub $0x4,%4 \n"
"movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
+ "sub $0x4,%4 \n"
"jge 40b \n"
"49: \n"
"test $0x2,%4 \n"
"je 29f \n"
- BUNDLEALIGN
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
"pextrw $0x5,%%xmm2,%k0 \n"
@@ -1139,13 +1134,8 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
"+r"(dst_width) // %4
: "rm"(x), // %5
"rm"(dx) // %6
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
);
}
@@ -1156,28 +1146,22 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
asm volatile (
LABELALIGN
"1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
+ "movdqu " MEMACCESS(1) ",%%xmm0 \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpckldq %%xmm0,%%xmm0 \n"
"punpckhdq %%xmm1,%%xmm1 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n"
+ "movdqu %%xmm0," MEMACCESS(0) " \n"
+ "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
+ "sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+r"(src_argb), // %1
"+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
+ :: "memory", "cc", NACL_R14
+ "xmm0", "xmm1"
);
}
@@ -1225,7 +1209,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
"paddd %%xmm3,%%xmm2 \n"
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
"psrlw $0x9,%%xmm1 \n"
- BUNDLEALIGN
MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0
"pshufb %%xmm5,%%xmm1 \n"
"pshufb %%xmm4,%%xmm0 \n"
@@ -1245,7 +1228,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
"add $0x1,%2 \n"
"jl 99f \n"
"psrlw $0x9,%%xmm2 \n"
- BUNDLEALIGN
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
"pshufb %%xmm5,%%xmm2 \n"
"pshufb %%xmm4,%%xmm0 \n"
@@ -1264,13 +1246,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
"+r"(x1) // %4
: "rm"(x), // %5
"rm"(dx) // %6
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
+ : "memory", "cc", NACL_R14
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
diff --git a/TMessagesProj/jni/libyuv/source/scale_mips.cc b/TMessagesProj/jni/libyuv/source/scale_mips.cc
index 3eb4f27c4..2298a74b9 100644
--- a/TMessagesProj/jni/libyuv/source/scale_mips.cc
+++ b/TMessagesProj/jni/libyuv/source/scale_mips.cc
@@ -31,7 +31,6 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n"
" nop \n"
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@@ -90,7 +89,6 @@ void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"bltz $t9, 2f \n"
" nop \n"
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@@ -188,7 +186,6 @@ void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"beqz $t9, 2f \n"
" nop \n"
- ".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
@@ -248,7 +245,6 @@ void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
"srl $t9, %[dst_width], 1 \n"
"andi $t8, %[dst_width], 1 \n"
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
@@ -319,7 +315,6 @@ void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
- ".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
@@ -368,7 +363,6 @@ void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n"
"repl.ph $t3, 3 \n" // 0x00030003
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
@@ -425,7 +419,6 @@ void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set noreorder \n"
"repl.ph $t2, 3 \n" // 0x00030003
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
@@ -477,7 +470,6 @@ void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n"
".set noreorder \n"
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
@@ -528,7 +520,6 @@ void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
".set push \n"
".set noreorder \n"
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
@@ -586,7 +577,6 @@ void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
".set push \n"
".set noreorder \n"
- ".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
diff --git a/TMessagesProj/jni/libyuv/source/scale_neon.cc b/TMessagesProj/jni/libyuv/source/scale_neon.cc
index 1b8a5ba58..10856cf84 100644
--- a/TMessagesProj/jni/libyuv/source/scale_neon.cc
+++ b/TMessagesProj/jni/libyuv/source/scale_neon.cc
@@ -16,7 +16,8 @@ extern "C" {
#endif
// This module is for GCC Neon.
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
// NEON downscalers with interpolation.
// Provided by Fritz Koenig
@@ -25,7 +26,6 @@ extern "C" {
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
@@ -42,13 +42,35 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
);
}
+// Read 32x1 average down and write 16x1.
+void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width) {
+ asm volatile (
+ "1: \n"
+ MEMACCESS(0)
+ "vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "vpaddl.u8 q0, q0 \n" // add adjacent
+ "vpaddl.u8 q1, q1 \n"
+ "vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
+ "vrshrn.u16 d1, q1, #1 \n"
+ MEMACCESS(1)
+ "vst1.8 {q0}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "q0", "q1" // Clobber List
+ );
+}
+
// Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
@@ -76,7 +98,6 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@@ -98,7 +119,6 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
const uint8* src_ptr2 = src_ptr + src_stride * 2;
const uint8* src_ptr3 = src_ptr + src_stride * 3;
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load up 16x4
@@ -137,7 +157,6 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@@ -160,7 +179,6 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@@ -220,7 +238,6 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
@@ -275,7 +292,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
asm volatile (
MEMACCESS(3)
"vld1.8 {q3}, [%3] \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0, d1, d2, d3}, [%0]! \n"
@@ -309,7 +325,6 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
MEMACCESS(7)
"vld1.8 {q15}, [%7] \n"
"add %3, %0 \n"
- ".p2align 2 \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
@@ -425,7 +440,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
MEMACCESS(5)
"vld1.8 {q14}, [%5] \n"
"add %3, %0 \n"
- ".p2align 2 \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
@@ -516,6 +530,110 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
);
}
+void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint16* dst_ptr, int src_width, int src_height) {
+ const uint8* src_tmp = NULL;
+ asm volatile (
+ "1: \n"
+ "mov %0, %1 \n"
+ "mov r12, %5 \n"
+ "veor q2, q2, q2 \n"
+ "veor q3, q3, q3 \n"
+ "2: \n"
+ // load 16 pixels into q0
+ MEMACCESS(0)
+ "vld1.8 {q0}, [%0], %3 \n"
+ "vaddw.u8 q3, q3, d1 \n"
+ "vaddw.u8 q2, q2, d0 \n"
+ "subs r12, r12, #1 \n"
+ "bgt 2b \n"
+ MEMACCESS(2)
+ "vst1.16 {q2, q3}, [%2]! \n" // store pixels
+ "add %1, %1, #16 \n"
+ "subs %4, %4, #16 \n" // 16 processed per loop
+ "bgt 1b \n"
+ : "+r"(src_tmp), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_ptr), // %2
+ "+r"(src_stride), // %3
+ "+r"(src_width), // %4
+ "+r"(src_height) // %5
+ :
+ : "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA8_LANE(n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5 \n" \
+ "add %3, %3, %4 \n" \
+ MEMACCESS(6) \
+ "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
+
+void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
+ int dst_width, int x, int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8* src_tmp = src_ptr;
+ asm volatile (
+ "vdup.32 q0, %3 \n" // x
+ "vdup.32 q1, %4 \n" // dx
+ "vld1.32 {q2}, [%5] \n" // 0 1 2 3
+ "vshl.i32 q3, q1, #2 \n" // 4 * dx
+ "vmul.s32 q1, q1, q2 \n"
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "vadd.s32 q1, q1, q0 \n"
+ // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
+ "vadd.s32 q2, q1, q3 \n"
+ "vshl.i32 q0, q3, #1 \n" // 8 * dx
+ "1: \n"
+ LOAD2_DATA8_LANE(0)
+ LOAD2_DATA8_LANE(1)
+ LOAD2_DATA8_LANE(2)
+ LOAD2_DATA8_LANE(3)
+ LOAD2_DATA8_LANE(4)
+ LOAD2_DATA8_LANE(5)
+ LOAD2_DATA8_LANE(6)
+ LOAD2_DATA8_LANE(7)
+ "vmov q10, q1 \n"
+ "vmov q11, q2 \n"
+ "vuzp.16 q10, q11 \n"
+ "vmovl.u8 q8, d6 \n"
+ "vmovl.u8 q9, d7 \n"
+ "vsubl.s16 q11, d18, d16 \n"
+ "vsubl.s16 q12, d19, d17 \n"
+ "vmovl.u16 q13, d20 \n"
+ "vmovl.u16 q10, d21 \n"
+ "vmul.s32 q11, q11, q13 \n"
+ "vmul.s32 q12, q12, q10 \n"
+ "vshrn.s32 d18, q11, #16 \n"
+ "vshrn.s32 d19, q12, #16 \n"
+ "vadd.s16 q8, q8, q9 \n"
+ "vmovn.s16 d6, q8 \n"
+
+ MEMACCESS(0)
+ "vst1.8 {d6}, [%0]! \n" // store pixels
+ "vadd.s32 q1, q1, q0 \n"
+ "vadd.s32 q2, q2, q0 \n"
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "bgt 1b \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(x), // %3
+ "+r"(dx), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11", "q12", "q13"
+ );
+}
+
+#undef LOAD2_DATA8_LANE
+
// 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride,
@@ -618,7 +736,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
- ".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
@@ -639,12 +756,39 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
);
}
+void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width) {
+ asm volatile (
+ "1: \n"
+ MEMACCESS(0)
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ MEMACCESS(0)
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
+ "vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
+ "vrshrn.u16 d1, q1, #1 \n"
+ "vrshrn.u16 d2, q2, #1 \n"
+ "vrshrn.u16 d3, q3, #1 \n"
+ MEMACCESS(1)
+ "vst4.8 {d0, d1, d2, d3}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %1, %0 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
@@ -685,7 +829,6 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width) {
asm volatile (
"mov r12, %3, lsl #2 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], r12 \n"
@@ -715,7 +858,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
asm volatile (
"mov r12, %4, lsl #2 \n"
"add %1, %1, %0 \n"
- ".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
@@ -756,7 +898,118 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
);
}
-#endif // __ARM_NEON__
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD1_DATA32_LANE(dn, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ MEMACCESS(6) \
+ "vld1.32 {"#dn"["#n"]}, [%6] \n"
+
+void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx) {
+ int tmp = 0;
+ const uint8* src_tmp = src_argb;
+ asm volatile (
+ "1: \n"
+ LOAD1_DATA32_LANE(d0, 0)
+ LOAD1_DATA32_LANE(d0, 1)
+ LOAD1_DATA32_LANE(d1, 0)
+ LOAD1_DATA32_LANE(d1, 1)
+ LOAD1_DATA32_LANE(d2, 0)
+ LOAD1_DATA32_LANE(d2, 1)
+ LOAD1_DATA32_LANE(d3, 0)
+ LOAD1_DATA32_LANE(d3, 1)
+
+ MEMACCESS(0)
+ "vst1.32 {q0, q1}, [%0]! \n" // store pixels
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "bgt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width), // %2
+ "+r"(x), // %3
+ "+r"(dx), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "q0", "q1"
+ );
+}
+
+#undef LOAD1_DATA32_LANE
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA32_LANE(dn1, dn2, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ MEMACCESS(6) \
+ "vld2.32 {"#dn1"["#n"], "#dn2"["#n"]}, [%6] \n"
+
+void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8* src_tmp = src_argb;
+ asm volatile (
+ "vdup.32 q0, %3 \n" // x
+ "vdup.32 q1, %4 \n" // dx
+ "vld1.32 {q2}, [%5] \n" // 0 1 2 3
+ "vshl.i32 q9, q1, #2 \n" // 4 * dx
+ "vmul.s32 q1, q1, q2 \n"
+ "vmov.i8 q3, #0x7f \n" // 0x7F
+ "vmov.i16 q15, #0x7f \n" // 0x7F
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "vadd.s32 q8, q1, q0 \n"
+ "1: \n"
+ // d0, d1: a
+ // d2, d3: b
+ LOAD2_DATA32_LANE(d0, d2, 0)
+ LOAD2_DATA32_LANE(d0, d2, 1)
+ LOAD2_DATA32_LANE(d1, d3, 0)
+ LOAD2_DATA32_LANE(d1, d3, 1)
+ "vshrn.i32 d22, q8, #9 \n"
+ "vand.16 d22, d22, d30 \n"
+ "vdup.8 d24, d22[0] \n"
+ "vdup.8 d25, d22[2] \n"
+ "vdup.8 d26, d22[4] \n"
+ "vdup.8 d27, d22[6] \n"
+ "vext.8 d4, d24, d25, #4 \n"
+ "vext.8 d5, d26, d27, #4 \n" // f
+ "veor.8 q10, q2, q3 \n" // 0x7f ^ f
+ "vmull.u8 q11, d0, d20 \n"
+ "vmull.u8 q12, d1, d21 \n"
+ "vmull.u8 q13, d2, d4 \n"
+ "vmull.u8 q14, d3, d5 \n"
+ "vadd.i16 q11, q11, q13 \n"
+ "vadd.i16 q12, q12, q14 \n"
+ "vshrn.i16 d0, q11, #7 \n"
+ "vshrn.i16 d1, q12, #7 \n"
+
+ MEMACCESS(0)
+ "vst1.32 {d0, d1}, [%0]! \n" // store pixels
+ "vadd.s32 q8, q8, q9 \n"
+ "subs %2, %2, #4 \n" // 4 processed per loop
+ "bgt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width), // %2
+ "+r"(x), // %3
+ "+r"(dx), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+#undef LOAD2_DATA32_LANE
+
+#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/scale_neon64.cc b/TMessagesProj/jni/libyuv/source/scale_neon64.cc
index 44df55c6c..1d5519357 100644
--- a/TMessagesProj/jni/libyuv/source/scale_neon64.cc
+++ b/TMessagesProj/jni/libyuv/source/scale_neon64.cc
@@ -8,16 +8,18 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "libyuv/scale.h"
#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
-// This module is for GCC Neon.
+// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-#ifdef HAS_SCALEROWDOWN2_NEON
+
// Read 32x1 throw away even pixels, and write 16x1.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
@@ -25,11 +27,11 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"1: \n"
// load even pixels into v0, odd into v1
MEMACCESS(0)
- "ld2 {v0.16b, v1.16b}, [%0], #32 \n"
- "subs %2, %2, #16 \n" // 16 processed per loop
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n"
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
MEMACCESS(1)
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
@@ -37,9 +39,30 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1" // Clobber List
);
}
-#endif //HAS_SCALEROWDOWN2_NEON
-#ifdef HAS_SCALEROWDOWN2_NEON
+// Read 32x1 average down and write 16x1.
+void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst, int dst_width) {
+ asm volatile (
+ "1: \n"
+ MEMACCESS(0)
+ "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load pixels and post inc
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
+ "uaddlp v0.8h, v0.16b \n" // add adjacent
+ "uaddlp v1.8h, v1.16b \n"
+ "rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
+ "rshrn2 v0.16b, v1.8h, #1 \n"
+ MEMACCESS(1)
+ "st1 {v0.16b}, [%1], #16 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "v0", "v1" // Clobber List
+ );
+}
+
// Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
@@ -48,10 +71,10 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
"1: \n"
MEMACCESS(0)
- "ld1 {v0.16b, v1.16b}, [%0], #32 \n" // load row 1 and post inc
+ "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load row 1 and post inc
MEMACCESS(1)
"ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc
- "subs %3, %3, #16 \n" // 16 processed per loop
+ "subs %w3, %w3, #16 \n" // 16 processed per loop
"uaddlp v0.8h, v0.16b \n" // row 1 add adjacent
"uaddlp v1.8h, v1.16b \n"
"uadalp v0.8h, v2.16b \n" // row 2 add adjacent + row1
@@ -60,7 +83,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"rshrn2 v0.16b, v1.8h, #2 \n"
MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(src_stride), // %1
"+r"(dst), // %2
@@ -69,19 +92,17 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3" // Clobber List
);
}
-#endif //HAS_SCALEROWDOWN2_NEON
-#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-3.8b}, [%0], #32 \n" // src line 0
- "subs %2, %2, #8 \n" // 8 processed per loop
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
@@ -89,9 +110,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN4_NEON
-#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride;
@@ -102,12 +121,12 @@ asm volatile (
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n" // load up 16x4
MEMACCESS(3)
- "ld1 {v1.16b}, [%3], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
MEMACCESS(4)
- "ld1 {v2.16b}, [%4], #16 \n"
+ "ld1 {v2.16b}, [%3], #16 \n"
MEMACCESS(5)
- "ld1 {v3.16b}, [%5], #16 \n"
- "subs %2, %2, #4 \n"
+ "ld1 {v3.16b}, [%4], #16 \n"
+ "subs %w5, %w5, #4 \n"
"uaddlp v0.8h, v0.16b \n"
"uadalp v0.8h, v1.16b \n"
"uadalp v0.8h, v2.16b \n"
@@ -116,20 +135,18 @@ asm volatile (
"rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding
MEMACCESS(1)
"st1 {v0.s}[0], [%1], #4 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_ptr1), // %3
- "+r"(src_ptr2), // %4
- "+r"(src_ptr3) // %5
+ "+r"(src_ptr1), // %2
+ "+r"(src_ptr2), // %3
+ "+r"(src_ptr3), // %4
+ "+r"(dst_width) // %5
:
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN4_NEON
-#ifdef HAS_SCALEROWDOWN34_NEON
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
@@ -139,12 +156,12 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
asm volatile (
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0
- "subs %2, %2, #24 \n"
- "mov v2.8b, v3.8b \n" // order v0, v1, v2
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
+ "subs %w2, %w2, #24 \n"
+ "orr v2.16b, v3.16b, v3.16b \n" // order v0, v1, v2
MEMACCESS(1)
- "st3 {v0.8b-v2.8b}, [%1], #24 \n"
- "bgt 1b \n"
+ "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
@@ -152,9 +169,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN34_NEON
-#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -163,10 +178,10 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3)
- "ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1
- "subs %2, %2, #24 \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
+ "subs %w2, %w2, #24 \n"
// filter src line 0 with src line 1
// expand chars to shorts to allow for room
@@ -202,9 +217,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v16.8h, #2 \n"
MEMACCESS(1)
- "st3 {v0.8b-v2.8b}, [%1], #24 \n"
+ "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
@@ -214,9 +229,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"v20", "memory", "cc"
);
}
-#endif //ScaleRowDown34_0_Box_NEON
-#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -225,10 +238,10 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n"
"1: \n"
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3)
- "ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1
- "subs %2, %2, #24 \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
+ "subs %w2, %w2, #24 \n"
// average src line 0 with src line 1
"urhadd v0.8b, v0.8b, v4.8b \n"
"urhadd v1.8b, v1.8b, v5.8b \n"
@@ -249,8 +262,8 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v4.8h, #2 \n"
MEMACCESS(1)
- "st3 {v0.8b-v2.8b}, [%1], #24 \n"
- "bgt 1b \n"
+ "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
@@ -259,9 +272,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN34_NEON
-#ifdef HAS_SCALEROWDOWN38_NEON
static uvec8 kShuf38 =
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
static uvec8 kShuf38_2 =
@@ -282,14 +293,14 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
"ld1 {v3.16b}, [%3] \n"
"1: \n"
MEMACCESS(0)
- "ld1 {v0.16b, v1.16b}, [%0], #32 \n"
- "subs %2, %2, #12 \n"
- "tbl v2.16b, {v0.16b, v1.16b}, v3.16b \n"
+ "ld1 {v0.16b,v1.16b}, [%0], #32 \n"
+ "subs %w2, %w2, #12 \n"
+ "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n"
MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n"
MEMACCESS(1)
"st1 {v2.s}[2], [%1], #4 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
@@ -298,14 +309,12 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
);
}
-#endif //HAS_SCALEROWDOWN38_NEON
-
-#ifdef HAS_SCALEROWDOWN38_NEON
// 32x3 -> 12x1
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride * 2;
+ ptrdiff_t tmp_src_stride = src_stride;
asm volatile (
MEMACCESS(5)
@@ -314,7 +323,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"ld1 {v30.16b}, [%6] \n"
MEMACCESS(7)
"ld1 {v31.8h}, [%7] \n"
- "add %3, %3, %0 \n"
+ "add %2, %2, %0 \n"
"1: \n"
// 00 40 01 41 02 42 03 43
@@ -322,12 +331,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3)
- "ld4 {v4.8b-v7.8b}, [%3], #32 \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
MEMACCESS(4)
- "ld4 {v16.8b-v19.8b}, [%4], #32 \n"
- "subs %2, %2, #12 \n"
+ "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n"
+ "subs %w4, %w4, #12 \n"
// Shuffle the input data around to get align the data
// so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
@@ -414,12 +423,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"st1 {v3.8b}, [%1], #8 \n"
MEMACCESS(1)
"st1 {v3.s}[2], [%1], #4 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride), // %3
- "+r"(src_ptr1) // %4
+ "+r"(tmp_src_stride), // %2
+ "+r"(src_ptr1), // %3
+ "+r"(dst_width) // %4
: "r"(&kMult38_Div6), // %5
"r"(&kShuf38_2), // %6
"r"(&kMult38_Div9) // %7
@@ -428,19 +437,19 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"v30", "v31", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN38_NEON
-#ifdef HAS_SCALEROWDOWN38_NEON
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
+ // TODO(fbarchard): use src_stride directly for clang 3.5+.
+ ptrdiff_t tmp_src_stride = src_stride;
asm volatile (
MEMACCESS(4)
"ld1 {v30.8h}, [%4] \n"
MEMACCESS(5)
"ld1 {v31.16b}, [%5] \n"
- "add %3, %3, %0 \n"
+ "add %2, %2, %0 \n"
"1: \n"
// 00 40 01 41 02 42 03 43
@@ -448,10 +457,10 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73
MEMACCESS(0)
- "ld4 {v0.8b-v3.8b}, [%0], #32 \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3)
- "ld4 {v4.8b-v7.8b}, [%3], #32 \n"
- "subs %2, %2, #12 \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
+ "subs %w3, %w3, #12 \n"
// Shuffle the input data around to get align the data
// so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
@@ -524,18 +533,124 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
"st1 {v3.8b}, [%1], #8 \n"
MEMACCESS(1)
"st1 {v3.s}[2], [%1], #4 \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- : "r"(&kMult38_Div6), // %4
- "r"(&kShuf38_2) // %5
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(tmp_src_stride), // %2
+ "+r"(dst_width) // %3
+ : "r"(&kMult38_Div6), // %4
+ "r"(&kShuf38_2) // %5
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
"v18", "v19", "v30", "v31", "memory", "cc"
);
}
-#endif //HAS_SCALEROWDOWN38_NEON
+
+void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint16* dst_ptr, int src_width, int src_height) {
+ const uint8* src_tmp = NULL;
+ asm volatile (
+ "1: \n"
+ "mov %0, %1 \n"
+ "mov w12, %w5 \n"
+ "eor v2.16b, v2.16b, v2.16b \n"
+ "eor v3.16b, v3.16b, v3.16b \n"
+ "2: \n"
+ // load 16 pixels into q0
+ MEMACCESS(0)
+ "ld1 {v0.16b}, [%0], %3 \n"
+ "uaddw2 v3.8h, v3.8h, v0.16b \n"
+ "uaddw v2.8h, v2.8h, v0.8b \n"
+ "subs w12, w12, #1 \n"
+ "b.gt 2b \n"
+ MEMACCESS(2)
+ "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels
+ "add %1, %1, #16 \n"
+ "subs %w4, %w4, #16 \n" // 16 processed per loop
+ "b.gt 1b \n"
+ : "+r"(src_tmp), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_ptr), // %2
+ "+r"(src_stride), // %3
+ "+r"(src_width), // %4
+ "+r"(src_height) // %5
+ :
+ : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA8_LANE(n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5 \n" \
+ "add %3, %3, %4 \n" \
+ MEMACCESS(6) \
+ "ld2 {v4.b, v5.b}["#n"], [%6] \n"
+
+void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
+ int dst_width, int x, int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8* src_tmp = src_ptr;
+ int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
+ int64 x64 = (int64) x;
+ int64 dx64 = (int64) dx;
+ asm volatile (
+ "dup v0.4s, %w3 \n" // x
+ "dup v1.4s, %w4 \n" // dx
+ "ld1 {v2.4s}, [%5] \n" // 0 1 2 3
+ "shl v3.4s, v1.4s, #2 \n" // 4 * dx
+ "mul v1.4s, v1.4s, v2.4s \n"
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "add v1.4s, v1.4s, v0.4s \n"
+ // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
+ "add v2.4s, v1.4s, v3.4s \n"
+ "shl v0.4s, v3.4s, #1 \n" // 8 * dx
+ "1: \n"
+ LOAD2_DATA8_LANE(0)
+ LOAD2_DATA8_LANE(1)
+ LOAD2_DATA8_LANE(2)
+ LOAD2_DATA8_LANE(3)
+ LOAD2_DATA8_LANE(4)
+ LOAD2_DATA8_LANE(5)
+ LOAD2_DATA8_LANE(6)
+ LOAD2_DATA8_LANE(7)
+ "mov v6.16b, v1.16b \n"
+ "mov v7.16b, v2.16b \n"
+ "uzp1 v6.8h, v6.8h, v7.8h \n"
+ "ushll v4.8h, v4.8b, #0 \n"
+ "ushll v5.8h, v5.8b, #0 \n"
+ "ssubl v16.4s, v5.4h, v4.4h \n"
+ "ssubl2 v17.4s, v5.8h, v4.8h \n"
+ "ushll v7.4s, v6.4h, #0 \n"
+ "ushll2 v6.4s, v6.8h, #0 \n"
+ "mul v16.4s, v16.4s, v7.4s \n"
+ "mul v17.4s, v17.4s, v6.4s \n"
+ "shrn v6.4h, v16.4s, #16 \n"
+ "shrn2 v6.8h, v17.4s, #16 \n"
+ "add v4.8h, v4.8h, v6.8h \n"
+ "xtn v4.8b, v4.8h \n"
+
+ MEMACCESS(0)
+ "st1 {v4.8b}, [%0], #8 \n" // store pixels
+ "add v1.4s, v1.4s, v0.4s \n"
+ "add v2.4s, v2.4s, v0.4s \n"
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "b.gt 1b \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_width64), // %2
+ "+r"(x64), // %3
+ "+r"(dx64), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3",
+ "v4", "v5", "v6", "v7", "v16", "v17"
+ );
+}
+
+#undef LOAD2_DATA8_LANE
// 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr,
@@ -543,15 +658,15 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
int dst_width, int source_y_fraction) {
int y_fraction = 256 - source_y_fraction;
asm volatile (
- "cmp %4, #0 \n"
- "beq 100f \n"
+ "cmp %w4, #0 \n"
+ "b.eq 100f \n"
"add %2, %2, %1 \n"
- "cmp %4, #64 \n"
- "beq 75f \n"
- "cmp %4, #128 \n"
- "beq 50f \n"
- "cmp %4, #192 \n"
- "beq 25f \n"
+ "cmp %w4, #64 \n"
+ "b.eq 75f \n"
+ "cmp %w4, #128 \n"
+ "b.eq 50f \n"
+ "cmp %w4, #192 \n"
+ "b.eq 25f \n"
"dup v5.8b, %w4 \n"
"dup v4.8b, %w5 \n"
@@ -561,7 +676,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
"umull v6.8h, v0.8b, v4.8b \n"
"umull2 v7.8h, v0.16b, v4.16b \n"
"umlal v6.8h, v1.8b, v5.8b \n"
@@ -570,7 +685,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"rshrn2 v0.16b, v7.8h, #8 \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
"b 99f \n"
// Blend 25 / 75.
@@ -579,12 +694,12 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 25b \n"
+ "b.gt 25b \n"
"b 99f \n"
// Blend 50 / 50.
@@ -593,11 +708,11 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v0.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v1.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 50b \n"
+ "b.gt 50b \n"
"b 99f \n"
// Blend 75 / 25.
@@ -606,22 +721,22 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"ld1 {v1.16b}, [%1], #16 \n"
MEMACCESS(2)
"ld1 {v0.16b}, [%2], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
"urhadd v0.16b, v0.16b, v1.16b \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 75b \n"
+ "b.gt 75b \n"
"b 99f \n"
// Blend 100 / 0 - Copy row unchanged.
"100: \n"
MEMACCESS(1)
"ld1 {v0.16b}, [%1], #16 \n"
- "subs %3, %3, #16 \n"
+ "subs %w3, %w3, #16 \n"
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n"
- "bgt 100b \n"
+ "b.gt 100b \n"
"99: \n"
MEMACCESS(0)
@@ -637,7 +752,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
);
}
-#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
@@ -647,12 +761,12 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"ld2 {v0.4s, v1.4s}, [%0], #32 \n"
MEMACCESS (0)
"ld2 {v2.4s, v3.4s}, [%0], #32 \n"
- "subs %2, %2, #8 \n" // 8 processed per loop
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
MEMACCESS (1)
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels
MEMACCESS (1)
"st1 {v3.16b}, [%1], #16 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r" (src_ptr), // %0
"+r" (dst), // %1
"+r" (dst_width) // %2
@@ -660,9 +774,34 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
);
}
-#endif //HAS_SCALEARGBROWDOWN2_NEON
-#ifdef HAS_SCALEARGBROWDOWN2_NEON
+void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+ uint8* dst_argb, int dst_width) {
+ asm volatile (
+ "1: \n"
+ MEMACCESS (0)
+ // load 8 ARGB pixels.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n"
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
+ "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
+ "rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
+ "rshrn v1.8b, v1.8h, #1 \n"
+ "rshrn v2.8b, v2.8h, #1 \n"
+ "rshrn v3.8b, v3.8h, #1 \n"
+ MEMACCESS (1)
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
@@ -670,14 +809,14 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
"1: \n"
MEMACCESS (0)
- "ld4 {v0.16b - v3.16b}, [%0], #64 \n" // load 8 ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB pixels.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
MEMACCESS (1)
- "ld4 {v16.16b - v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels.
+ "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels.
"uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts.
"uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts.
"uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts.
@@ -687,8 +826,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"rshrn v2.8b, v2.8h, #2 \n"
"rshrn v3.8b, v3.8h, #2 \n"
MEMACCESS (2)
- "st4 {v0.8b - v3.8b}, [%2], #32 \n"
- "bgt 1b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"
+ "b.gt 1b \n"
: "+r" (src_ptr), // %0
"+r" (src_stride), // %1
"+r" (dst), // %2
@@ -697,9 +836,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
);
}
-#endif //HAS_SCALEARGBROWDOWN2_NEON
-#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
@@ -714,23 +851,21 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"ld1 {v0.s}[2], [%0], %3 \n"
MEMACCESS(0)
"ld1 {v0.s}[3], [%0], %3 \n"
- "subs %2, %2, #4 \n" // 4 pixels per loop.
+ "subs %w2, %w2, #4 \n" // 4 pixels per loop.
MEMACCESS(1)
"st1 {v0.16b}, [%1], #16 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
- : "r"(src_stepx * 4) // %3
+ : "r"((int64)(src_stepx * 4)) // %3
: "memory", "cc", "v0"
);
}
-#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
-#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
-// TODO, might be worth another optimization pass in future.
+// TODO(Yang Zhang): Might be worth another optimization pass in future.
// It could be upgraded to 8 pixels at a time to start with.
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
@@ -739,49 +874,167 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
"1: \n"
MEMACCESS(0)
- "ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 blocks -> 2x1
+ "ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 blocks -> 2x1
MEMACCESS(1)
- "ld1 {v1.8b}, [%1], %4 \n"
+ "ld1 {v1.8b}, [%1], %4 \n"
MEMACCESS(0)
- "ld1 {v2.8b}, [%0], %4 \n"
+ "ld1 {v2.8b}, [%0], %4 \n"
MEMACCESS(1)
- "ld1 {v3.8b}, [%1], %4 \n"
+ "ld1 {v3.8b}, [%1], %4 \n"
MEMACCESS(0)
- "ld1 {v4.8b}, [%0], %4 \n"
+ "ld1 {v4.8b}, [%0], %4 \n"
MEMACCESS(1)
- "ld1 {v5.8b}, [%1], %4 \n"
+ "ld1 {v5.8b}, [%1], %4 \n"
MEMACCESS(0)
- "ld1 {v6.8b}, [%0], %4 \n"
+ "ld1 {v6.8b}, [%0], %4 \n"
MEMACCESS(1)
- "ld1 {v7.8b}, [%1], %4 \n"
- "uaddl v0.8h, v0.8b, v1.8b \n"
- "uaddl v2.8h, v2.8b, v3.8b \n"
- "uaddl v4.8h, v4.8b, v5.8b \n"
- "uaddl v6.8h, v6.8b, v7.8b \n"
- "mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd
- "mov v0.d[1], v2.d[0] \n"
- "mov v2.d[0], v16.d[1] \n"
- "mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh
- "mov v4.d[1], v6.d[0] \n"
- "mov v6.d[0], v16.d[1] \n"
- "add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d)
- "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h)
- "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels.
- "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels.
- "subs %3, %3, #4 \n" // 4 pixels per loop.
+ "ld1 {v7.8b}, [%1], %4 \n"
+ "uaddl v0.8h, v0.8b, v1.8b \n"
+ "uaddl v2.8h, v2.8b, v3.8b \n"
+ "uaddl v4.8h, v4.8b, v5.8b \n"
+ "uaddl v6.8h, v6.8b, v7.8b \n"
+ "mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd
+ "mov v0.d[1], v2.d[0] \n"
+ "mov v2.d[0], v16.d[1] \n"
+ "mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh
+ "mov v4.d[1], v6.d[0] \n"
+ "mov v6.d[0], v16.d[1] \n"
+ "add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d)
+ "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h)
+ "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels.
+ "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels.
+ "subs %w3, %w3, #4 \n" // 4 pixels per loop.
MEMACCESS(2)
"st1 {v0.16b}, [%2], #16 \n"
- "bgt 1b \n"
+ "b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(src_stride), // %1
"+r"(dst_argb), // %2
"+r"(dst_width) // %3
- : "r"(src_stepx * 4) // %4
+ : "r"((int64)(src_stepx * 4)) // %4
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
);
}
-#endif // HAS_SCALEARGBROWDOWNEVEN_NEON
-#endif // __aarch64__
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD1_DATA32_LANE(vn, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ MEMACCESS(6) \
+ "ld1 {"#vn".s}["#n"], [%6] \n"
+
+void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx) {
+ const uint8* src_tmp = src_argb;
+ int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
+ int64 x64 = (int64) x;
+ int64 dx64 = (int64) dx;
+ int64 tmp64 = 0;
+ asm volatile (
+ "1: \n"
+ LOAD1_DATA32_LANE(v0, 0)
+ LOAD1_DATA32_LANE(v0, 1)
+ LOAD1_DATA32_LANE(v0, 2)
+ LOAD1_DATA32_LANE(v0, 3)
+ LOAD1_DATA32_LANE(v1, 0)
+ LOAD1_DATA32_LANE(v1, 1)
+ LOAD1_DATA32_LANE(v1, 2)
+ LOAD1_DATA32_LANE(v1, 3)
+
+ MEMACCESS(0)
+ "st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "b.gt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width64), // %2
+ "+r"(x64), // %3
+ "+r"(dx64), // %4
+ "+r"(tmp64), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "v0", "v1"
+ );
+}
+
+#undef LOAD1_DATA32_LANE
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA32_LANE(vn1, vn2, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ MEMACCESS(6) \
+ "ld2 {"#vn1".s, "#vn2".s}["#n"], [%6] \n"
+
+void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
+ int dst_width, int x, int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8* src_tmp = src_argb;
+ int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
+ int64 x64 = (int64) x;
+ int64 dx64 = (int64) dx;
+ asm volatile (
+ "dup v0.4s, %w3 \n" // x
+ "dup v1.4s, %w4 \n" // dx
+ "ld1 {v2.4s}, [%5] \n" // 0 1 2 3
+ "shl v6.4s, v1.4s, #2 \n" // 4 * dx
+ "mul v1.4s, v1.4s, v2.4s \n"
+ "movi v3.16b, #0x7f \n" // 0x7F
+ "movi v4.8h, #0x7f \n" // 0x7F
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "add v5.4s, v1.4s, v0.4s \n"
+ "1: \n"
+ // d0, d1: a
+ // d2, d3: b
+ LOAD2_DATA32_LANE(v0, v1, 0)
+ LOAD2_DATA32_LANE(v0, v1, 1)
+ LOAD2_DATA32_LANE(v0, v1, 2)
+ LOAD2_DATA32_LANE(v0, v1, 3)
+ "shrn v2.4h, v5.4s, #9 \n"
+ "and v2.8b, v2.8b, v4.8b \n"
+ "dup v16.8b, v2.b[0] \n"
+ "dup v17.8b, v2.b[2] \n"
+ "dup v18.8b, v2.b[4] \n"
+ "dup v19.8b, v2.b[6] \n"
+ "ext v2.8b, v16.8b, v17.8b, #4 \n"
+ "ext v17.8b, v18.8b, v19.8b, #4 \n"
+ "ins v2.d[1], v17.d[0] \n" // f
+ "eor v7.16b, v2.16b, v3.16b \n" // 0x7f ^ f
+ "umull v16.8h, v0.8b, v7.8b \n"
+ "umull2 v17.8h, v0.16b, v7.16b \n"
+ "umull v18.8h, v1.8b, v2.8b \n"
+ "umull2 v19.8h, v1.16b, v2.16b \n"
+ "add v16.8h, v16.8h, v18.8h \n"
+ "add v17.8h, v17.8h, v19.8h \n"
+ "shrn v0.8b, v16.8h, #7 \n"
+ "shrn2 v0.16b, v17.8h, #7 \n"
+
+ MEMACCESS(0)
+ "st1 {v0.4s}, [%0], #16 \n" // store pixels
+ "add v5.4s, v5.4s, v6.4s \n"
+ "subs %w2, %w2, #4 \n" // 4 processed per loop
+ "b.gt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width64), // %2
+ "+r"(x64), // %3
+ "+r"(dx64), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5",
+ "v6", "v7", "v16", "v17", "v18", "v19"
+ );
+}
+
+#undef LOAD2_DATA32_LANE
+
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/scale_win.cc b/TMessagesProj/jni/libyuv/source/scale_win.cc
index 840b9738d..5ab4fa0cc 100644
--- a/TMessagesProj/jni/libyuv/source/scale_win.cc
+++ b/TMessagesProj/jni/libyuv/source/scale_win.cc
@@ -9,14 +9,15 @@
*/
#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
// Offsets for source bytes 0 to 9
static uvec8 kShuf0 =
@@ -93,9 +94,111 @@ static uvec16 kScaleAb2 =
{ 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
// Reads 32 pixels, throws half away and writes 16 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+__declspec(naked)
+void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ psrlw xmm0, 8 // isolate odd pixels.
+ psrlw xmm1, 8
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 32x1 rectangle to 16x1.
+__declspec(naked)
+void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+
+ pcmpeqb xmm4, xmm4 // constant 0x0101
+ psrlw xmm4, 15
+ packuswb xmm4, xmm4
+ pxor xmm5, xmm5 // constant 0
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ pmaddubsw xmm0, xmm4 // horizontal add
+ pmaddubsw xmm1, xmm4
+ pavgw xmm0, xmm5 // (x + 1) / 2
+ pavgw xmm1, xmm5
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 32x2 rectangle to 16x1.
+__declspec(naked)
+void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+
+ pcmpeqb xmm4, xmm4 // constant 0x0101
+ psrlw xmm4, 15
+ packuswb xmm4, xmm4
+ pxor xmm5, xmm5 // constant 0
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
+ lea eax, [eax + 32]
+ pmaddubsw xmm0, xmm4 // horizontal add
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // vertical add
+ paddw xmm1, xmm3
+ psrlw xmm0, 1
+ psrlw xmm1, 1
+ pavgw xmm0, xmm5 // (x + 1) / 2
+ pavgw xmm1, xmm5
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ pop esi
+ ret
+ }
+}
+
+#ifdef HAS_SCALEROWDOWN2_AVX2
+// Reads 64 pixels, throws half away and writes 32 pixels.
+__declspec(naked)
+void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -103,222 +206,110 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
mov edx, [esp + 12] // dst_ptr
mov ecx, [esp + 16] // dst_width
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // isolate odd pixels.
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpsrlw ymm0, ymm0, 8 // isolate odd pixels.
+ vpsrlw ymm1, ymm1, 8
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg wloop
+ vzeroupper
ret
}
}
-// Blends 32x1 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+// Blends 64x1 rectangle to 32x1.
+__declspec(naked)
+void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
+ mov eax, [esp + 4] // src_ptr
+ // src_stride
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+
+ vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b
+ vpsrlw ymm4, ymm4, 15
+ vpackuswb ymm4, ymm4, ymm4
+ vpxor ymm5, ymm5, ymm5 // constant 0
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
+ vpavgw ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg wloop
+ vzeroupper
ret
}
}
-// Blends 32x2 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+// For rounding, average = (sum + 2) / 4
+// becomes average((sum >> 1), 0)
+// Blends 64x2 rectangle to 32x1.
+__declspec(naked)
+void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+
+ vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b
+ vpsrlw ymm4, ymm4, 15
+ vpackuswb ymm4, ymm4, ymm4
+ vpxor ymm5, ymm5, ymm5 // constant 0
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + esi]
+ vmovdqu ymm3, [eax + esi + 32]
+ lea eax, [eax + 64]
+ vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // vertical add
+ vpaddw ymm1, ymm1, ymm3
+ vpsrlw ymm0, ymm0, 1
+ vpsrlw ymm1, ymm1, 1
+ vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
+ vpavgw ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg wloop
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// Reads 32 pixels, throws half away and writes 16 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
-
- align 4
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // isolate odd pixels.
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x1 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x2 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- pop esi
+ pop esi
+ vzeroupper
ret
}
}
+#endif // HAS_SCALEROWDOWN2_AVX2
// Point samples 32 pixels to 8 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+__declspec(naked)
+void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -329,19 +320,18 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
psrld xmm5, 24
pslld xmm5, 16
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
pand xmm0, xmm5
pand xmm1, xmm5
packuswb xmm0, xmm1
psrlw xmm0, 8
packuswb xmm0, xmm0
- sub ecx, 8
movq qword ptr [edx], xmm0
lea edx, [edx + 8]
+ sub ecx, 8
jg wloop
ret
@@ -349,9 +339,8 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Blends 32x4 rectangle to 8x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+__declspec(naked)
+void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
push esi
@@ -361,46 +350,43 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
mov edx, [esp + 8 + 12] // dst_ptr
mov ecx, [esp + 8 + 16] // dst_width
lea edi, [esi + esi * 2] // src_stride * 3
- pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff
- psrlw xmm7, 8
+ pcmpeqb xmm4, xmm4 // constant 0x0101
+ psrlw xmm4, 15
+ movdqa xmm5, xmm4
+ packuswb xmm4, xmm4
+ psllw xmm5, 3 // constant 0x0008
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
- movdqa xmm2, [eax + esi * 2]
- movdqa xmm3, [eax + esi * 2 + 16]
- movdqa xmm4, [eax + edi]
- movdqa xmm5, [eax + edi + 16]
+ movdqu xmm0, [eax] // average rows
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
+ pmaddubsw xmm0, xmm4 // horizontal add
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // vertical add rows 0, 1
+ paddw xmm1, xmm3
+ movdqu xmm2, [eax + esi * 2]
+ movdqu xmm3, [eax + esi * 2 + 16]
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // add row 2
+ paddw xmm1, xmm3
+ movdqu xmm2, [eax + edi]
+ movdqu xmm3, [eax + edi + 16]
lea eax, [eax + 32]
- pavgb xmm2, xmm4
- pavgb xmm3, xmm5
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm7
- pand xmm3, xmm7
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- movdqa xmm2, xmm0 // average columns (16 to 8 pixels)
- psrlw xmm0, 8
- pand xmm2, xmm7
- pavgw xmm0, xmm2
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // add row 3
+ paddw xmm1, xmm3
+ phaddw xmm0, xmm1
+ paddw xmm0, xmm5 // + 8 for round
+ psrlw xmm0, 4 // /16 for average of 4 * 4
packuswb xmm0, xmm0
-
- sub ecx, 8
movq qword ptr [edx], xmm0
lea edx, [edx + 8]
+ sub ecx, 8
jg wloop
pop edi
@@ -409,13 +395,106 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
}
}
+#ifdef HAS_SCALEROWDOWN4_AVX2
+// Point samples 64 pixels to 16 pixels.
+__declspec(naked)
+void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff0000
+ vpsrld ymm5, ymm5, 24
+ vpslld ymm5, ymm5, 16
+
+ wloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpand ymm0, ymm0, ymm5
+ vpand ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vpsrlw ymm0, ymm0, 8
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ vzeroupper
+ ret
+ }
+}
+
+// Blends 64x4 rectangle to 16x1.
+__declspec(naked)
+void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_ptr
+ mov esi, [esp + 8 + 8] // src_stride
+ mov edx, [esp + 8 + 12] // dst_ptr
+ mov ecx, [esp + 8 + 16] // dst_width
+ lea edi, [esi + esi * 2] // src_stride * 3
+ vpcmpeqb ymm4, ymm4, ymm4 // constant 0x0101
+ vpsrlw ymm4, ymm4, 15
+ vpsllw ymm5, ymm4, 3 // constant 0x0008
+ vpackuswb ymm4, ymm4, ymm4
+
+ wloop:
+ vmovdqu ymm0, [eax] // average rows
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + esi]
+ vmovdqu ymm3, [eax + esi + 32]
+ vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // vertical add rows 0, 1
+ vpaddw ymm1, ymm1, ymm3
+ vmovdqu ymm2, [eax + esi * 2]
+ vmovdqu ymm3, [eax + esi * 2 + 32]
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // add row 2
+ vpaddw ymm1, ymm1, ymm3
+ vmovdqu ymm2, [eax + edi]
+ vmovdqu ymm3, [eax + edi + 32]
+ lea eax, [eax + 64]
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // add row 3
+ vpaddw ymm1, ymm1, ymm3
+ vphaddw ymm0, ymm0, ymm1 // mutates
+ vpermq ymm0, ymm0, 0xd8 // unmutate vphaddw
+ vpaddw ymm0, ymm0, ymm5 // + 8 for round
+ vpsrlw ymm0, ymm0, 4 // /32 for average of 4 * 4
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_SCALEROWDOWN4_AVX2
+
// Point samples 32 pixels to 24 pixels.
// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
// Then shuffled to do the scaling.
-// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
@@ -423,14 +502,13 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
// src_stride ignored
mov edx, [esp + 12] // dst_ptr
mov ecx, [esp + 16] // dst_width
- movdqa xmm3, kShuf0
- movdqa xmm4, kShuf1
- movdqa xmm5, kShuf2
+ movdqa xmm3, xmmword ptr kShuf0
+ movdqa xmm4, xmmword ptr kShuf1
+ movdqa xmm5, xmmword ptr kShuf2
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm1
palignr xmm1, xmm0, 8
@@ -463,8 +541,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
// xmm7 kRound34
// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -474,17 +551,16 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShuf01
- movdqa xmm3, kShuf11
- movdqa xmm4, kShuf21
- movdqa xmm5, kMadd01
- movdqa xmm6, kMadd11
- movdqa xmm7, kRound34
+ movdqa xmm2, xmmword ptr kShuf01
+ movdqa xmm3, xmmword ptr kShuf11
+ movdqa xmm4, xmmword ptr kShuf21
+ movdqa xmm5, xmmword ptr kMadd01
+ movdqa xmm6, xmmword ptr kMadd11
+ movdqa xmm7, xmmword ptr kRound34
- align 4
wloop:
- movdqa xmm0, [eax] // pixels 0..7
- movdqa xmm1, [eax + esi]
+ movdqu xmm0, [eax] // pixels 0..7
+ movdqu xmm1, [eax + esi]
pavgb xmm0, xmm1
pshufb xmm0, xmm2
pmaddubsw xmm0, xmm5
@@ -501,19 +577,19 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edx + 8], xmm0
- movdqa xmm0, [eax + 16] // pixels 16..23
- movdqa xmm1, [eax + esi + 16]
+ movdqu xmm0, [eax + 16] // pixels 16..23
+ movdqu xmm1, [eax + esi + 16]
lea eax, [eax + 32]
pavgb xmm0, xmm1
pshufb xmm0, xmm4
- movdqa xmm1, kMadd21
+ movdqa xmm1, xmmword ptr kMadd21
pmaddubsw xmm0, xmm1
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
- sub ecx, 24
movq qword ptr [edx + 16], xmm0
lea edx, [edx + 24]
+ sub ecx, 24
jg wloop
pop esi
@@ -522,8 +598,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
}
// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -533,17 +608,16 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShuf01
- movdqa xmm3, kShuf11
- movdqa xmm4, kShuf21
- movdqa xmm5, kMadd01
- movdqa xmm6, kMadd11
- movdqa xmm7, kRound34
+ movdqa xmm2, xmmword ptr kShuf01
+ movdqa xmm3, xmmword ptr kShuf11
+ movdqa xmm4, xmmword ptr kShuf21
+ movdqa xmm5, xmmword ptr kMadd01
+ movdqa xmm6, xmmword ptr kMadd11
+ movdqa xmm7, xmmword ptr kRound34
- align 4
wloop:
- movdqa xmm0, [eax] // pixels 0..7
- movdqa xmm1, [eax + esi]
+ movdqu xmm0, [eax] // pixels 0..7
+ movdqu xmm1, [eax + esi]
pavgb xmm1, xmm0
pavgb xmm0, xmm1
pshufb xmm0, xmm2
@@ -562,20 +636,20 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edx + 8], xmm0
- movdqa xmm0, [eax + 16] // pixels 16..23
- movdqa xmm1, [eax + esi + 16]
+ movdqu xmm0, [eax + 16] // pixels 16..23
+ movdqu xmm1, [eax + esi + 16]
lea eax, [eax + 32]
pavgb xmm1, xmm0
pavgb xmm0, xmm1
pshufb xmm0, xmm4
- movdqa xmm1, kMadd21
+ movdqa xmm1, xmmword ptr kMadd21
pmaddubsw xmm0, xmm1
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
- sub ecx, 24
movq qword ptr [edx + 16], xmm0
lea edx, [edx+24]
+ sub ecx, 24
jg wloop
pop esi
@@ -586,7 +660,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
// 3/8 point sampler
// Scale 32 pixels to 12
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
__asm {
@@ -594,23 +668,22 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
// src_stride ignored
mov edx, [esp + 12] // dst_ptr
mov ecx, [esp + 16] // dst_width
- movdqa xmm4, kShuf38a
- movdqa xmm5, kShuf38b
+ movdqa xmm4, xmmword ptr kShuf38a
+ movdqa xmm5, xmmword ptr kShuf38b
- align 4
xloop:
- movdqa xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
- movdqa xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11
+ movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
+ movdqu xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11
lea eax, [eax + 32]
pshufb xmm0, xmm4
pshufb xmm1, xmm5
paddusb xmm0, xmm1
- sub ecx, 12
movq qword ptr [edx], xmm0 // write 12 pixels
movhlps xmm1, xmm0
movd [edx + 8], xmm1
lea edx, [edx + 12]
+ sub ecx, 12
jg xloop
ret
@@ -618,7 +691,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Scale 16x3 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -628,15 +701,14 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShufAc
- movdqa xmm3, kShufAc3
- movdqa xmm4, kScaleAc33
+ movdqa xmm2, xmmword ptr kShufAc
+ movdqa xmm3, xmmword ptr kShufAc3
+ movdqa xmm4, xmmword ptr kScaleAc33
pxor xmm5, xmm5
- align 4
xloop:
- movdqa xmm0, [eax] // sum up 3 rows into xmm0/1
- movdqa xmm6, [eax + esi]
+ movdqu xmm0, [eax] // sum up 3 rows into xmm0/1
+ movdqu xmm6, [eax + esi]
movhlps xmm1, xmm0
movhlps xmm7, xmm6
punpcklbw xmm0, xmm5
@@ -645,7 +717,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
punpcklbw xmm7, xmm5
paddusw xmm0, xmm6
paddusw xmm1, xmm7
- movdqa xmm6, [eax + esi * 2]
+ movdqu xmm6, [eax + esi * 2]
lea eax, [eax + 16]
movhlps xmm7, xmm6
punpcklbw xmm6, xmm5
@@ -671,11 +743,11 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6
packuswb xmm6, xmm6
- sub ecx, 6
movd [edx], xmm6 // write 6 pixels
psrlq xmm6, 16
movd [edx + 2], xmm6
lea edx, [edx + 6]
+ sub ecx, 6
jg xloop
pop esi
@@ -684,7 +756,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
}
// Scale 16x2 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
@@ -694,16 +766,16 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShufAb0
- movdqa xmm3, kShufAb1
- movdqa xmm4, kShufAb2
- movdqa xmm5, kScaleAb2
+ movdqa xmm2, xmmword ptr kShufAb0
+ movdqa xmm3, xmmword ptr kShufAb1
+ movdqa xmm4, xmmword ptr kShufAb2
+ movdqa xmm5, xmmword ptr kScaleAb2
- align 4
xloop:
- movdqa xmm0, [eax] // average 2 rows into xmm0
- pavgb xmm0, [eax + esi]
+ movdqu xmm0, [eax] // average 2 rows into xmm0
+ movdqu xmm1, [eax + esi]
lea eax, [eax + 16]
+ pavgb xmm0, xmm1
movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1
pshufb xmm1, xmm2
@@ -716,11 +788,11 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2
packuswb xmm1, xmm1
- sub ecx, 6
movd [edx], xmm1 // write 6 pixels
psrlq xmm1, 16
movd [edx + 2], xmm1
lea edx, [edx + 6]
+ sub ecx, 6
jg xloop
pop esi
@@ -728,79 +800,68 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
}
}
-// Reads 16xN bytes and produces 16 shorts at a time.
-// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.
-__declspec(naked) __declspec(align(16))
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width,
- int src_height) {
+// Reads 16 bytes and accumulates to 16 shorts at a time.
+__declspec(naked)
+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
__asm {
- push esi
- push edi
- push ebx
- push ebp
- mov esi, [esp + 16 + 4] // src_ptr
- mov edx, [esp + 16 + 8] // src_stride
- mov edi, [esp + 16 + 12] // dst_ptr
- mov ecx, [esp + 16 + 16] // dst_width
- mov ebx, [esp + 16 + 20] // height
- pxor xmm4, xmm4
- dec ebx
+ mov eax, [esp + 4] // src_ptr
+ mov edx, [esp + 8] // dst_ptr
+ mov ecx, [esp + 12] // src_width
+ pxor xmm5, xmm5
- align 4
+ // sum rows
xloop:
- // first row
- movdqa xmm0, [esi]
- lea eax, [esi + edx]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm4
- punpckhbw xmm1, xmm4
- lea esi, [esi + 16]
- mov ebp, ebx
- test ebp, ebp
- je ydone
-
- // sum remaining rows
- align 4
- yloop:
- movdqa xmm2, [eax] // read 16 pixels
- lea eax, [eax + edx] // advance to next row
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
+ movdqu xmm3, [eax] // read 16 bytes
+ lea eax, [eax + 16]
+ movdqu xmm0, [edx] // read 16 words from destination
+ movdqu xmm1, [edx + 16]
+ movdqa xmm2, xmm3
+ punpcklbw xmm2, xmm5
+ punpckhbw xmm3, xmm5
paddusw xmm0, xmm2 // sum 16 words
paddusw xmm1, xmm3
- sub ebp, 1
- jg yloop
-
- align 4
- ydone:
- movdqa [edi], xmm0
- movdqa [edi + 16], xmm1
- lea edi, [edi + 32]
-
+ movdqu [edx], xmm0 // write 16 words to destination
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
sub ecx, 16
jg xloop
-
- pop ebp
- pop ebx
- pop edi
- pop esi
ret
}
}
-// Bilinear column filtering. SSSE3 version.
-// TODO(fbarchard): Port to Neon
-// TODO(fbarchard): Switch the following:
-// xor ebx, ebx
-// mov bx, word ptr [esi + eax] // 2 source x0 pixels
-// To
-// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
-// when drmemory bug fixed.
-// https://code.google.com/p/drmemory/issues/detail?id=1396
+#ifdef HAS_SCALEADDROW_AVX2
+// Reads 32 bytes and accumulates to 32 shorts at a time.
+__declspec(naked)
+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ mov edx, [esp + 8] // dst_ptr
+ mov ecx, [esp + 12] // src_width
+ vpxor ymm5, ymm5, ymm5
-__declspec(naked) __declspec(align(16))
+ // sum rows
+ xloop:
+ vmovdqu ymm3, [eax] // read 32 bytes
+ lea eax, [eax + 32]
+ vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck
+ vpunpcklbw ymm2, ymm3, ymm5
+ vpunpckhbw ymm3, ymm3, ymm5
+ vpaddusw ymm0, ymm2, [edx] // sum 16 words
+ vpaddusw ymm1, ymm3, [edx + 32]
+ vmovdqu [edx], ymm0 // write 32 words to destination
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 32
+ jg xloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_SCALEADDROW_AVX2
+
+// Bilinear column filtering. SSSE3 version.
+__declspec(naked)
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
__asm {
@@ -828,7 +889,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
pextrw edx, xmm2, 3 // get x1 integer. preroll
// 2 Pixel loop.
- align 4
xloop2:
movdqa xmm1, xmm2 // x0, x1 fractions.
paddd xmm2, xmm3 // x += dx
@@ -851,7 +911,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
sub ecx, 2 // 2 pixels
jge xloop2
- align 4
xloop29:
add ecx, 2 - 1
@@ -869,7 +928,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
movd ebx, xmm0
mov [edi], bl
- align 4
xloop99:
pop edi
@@ -880,8 +938,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
}
// Reads 16 pixels, duplicates them and writes 32 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
__asm {
@@ -889,17 +946,16 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
mov eax, [esp + 8] // src_ptr
mov ecx, [esp + 12] // dst_width
- align 4
wloop:
- movdqa xmm0, [eax]
+ movdqu xmm0, [eax]
lea eax, [eax + 16]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm0
punpckhbw xmm1, xmm1
- sub ecx, 32
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
lea edx, [edx + 32]
+ sub ecx, 32
jg wloop
ret
@@ -907,8 +963,7 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
}
// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
@@ -918,15 +973,14 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
mov edx, [esp + 12] // dst_argb
mov ecx, [esp + 16] // dst_width
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
shufps xmm0, xmm1, 0xdd
- sub ecx, 4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg wloop
ret
@@ -934,8 +988,7 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
}
// Blends 8x1 rectangle to 4x1.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
@@ -945,18 +998,17 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
mov edx, [esp + 12] // dst_argb
mov ecx, [esp + 16] // dst_width
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
shufps xmm0, xmm1, 0x88 // even pixels
shufps xmm2, xmm1, 0xdd // odd pixels
pavgb xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg wloop
ret
@@ -964,8 +1016,7 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
}
// Blends 8x2 rectangle to 4x1.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
@@ -976,12 +1027,11 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
mov edx, [esp + 4 + 12] // dst_argb
mov ecx, [esp + 4 + 16] // dst_width
- align 4
wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
lea eax, [eax + 32]
pavgb xmm0, xmm2 // average rows
pavgb xmm1, xmm3
@@ -989,9 +1039,9 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
shufps xmm0, xmm1, 0x88 // even pixels
shufps xmm2, xmm1, 0xdd // odd pixels
pavgb xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg wloop
pop esi
@@ -1000,8 +1050,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
}
// Reads 4 pixels at a time.
-// Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width) {
@@ -1016,7 +1065,6 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
lea ebx, [ebx * 4]
lea edi, [ebx + ebx * 2]
- align 4
wloop:
movd xmm0, [eax]
movd xmm1, [eax + ebx]
@@ -1026,9 +1074,9 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
lea eax, [eax + ebx * 4]
punpckldq xmm2, xmm3
punpcklqdq xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg wloop
pop edi
@@ -1038,8 +1086,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
}
// Blends four 2x2 to 4x1.
-// Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
@@ -1057,7 +1104,6 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
lea ebx, [ebx * 4]
lea edi, [ebx + ebx * 2]
- align 4
wloop:
movq xmm0, qword ptr [eax] // row0 4 pairs
movhps xmm0, qword ptr [eax + ebx]
@@ -1075,9 +1121,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
shufps xmm0, xmm1, 0x88 // even pixels
shufps xmm2, xmm1, 0xdd // odd pixels
pavgb xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
+ movdqu [edx], xmm0
lea edx, [edx + 16]
+ sub ecx, 4
jg wloop
pop edi
@@ -1088,7 +1134,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
}
// Column scaling unfiltered. SSE2 version.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
__asm {
@@ -1118,7 +1164,6 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
jl xloop49
// 4 Pixel loop.
- align 4
xloop4:
movd xmm0, [esi + eax * 4] // 1 source x0 pixels
movd xmm1, [esi + edx * 4] // 1 source x1 pixels
@@ -1133,12 +1178,11 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
pextrw edx, xmm2, 3 // get x1 integer. next iteration.
punpckldq xmm1, xmm4 // x2 x3
punpcklqdq xmm0, xmm1 // x0 x1 x2 x3
- sub ecx, 4 // 4 pixels
movdqu [edi], xmm0
lea edi, [edi + 16]
+ sub ecx, 4 // 4 pixels
jge xloop4
- align 4
xloop49:
test ecx, 2
je xloop29
@@ -1159,7 +1203,6 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
// 1 Pixels.
movd xmm0, [esi + eax * 4] // 1 source x2 pixels
movd dword ptr [edi], xmm0
- align 4
xloop99:
pop esi
@@ -1182,7 +1225,7 @@ static uvec8 kShuffleFractions = {
0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
};
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
__asm {
@@ -1193,8 +1236,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
mov ecx, [esp + 8 + 12] // dst_width
movd xmm2, [esp + 8 + 16] // x
movd xmm3, [esp + 8 + 20] // dx
- movdqa xmm4, kShuffleColARGB
- movdqa xmm5, kShuffleFractions
+ movdqa xmm4, xmmword ptr kShuffleColARGB
+ movdqa xmm5, xmmword ptr kShuffleFractions
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
psrlw xmm6, 9
pextrw eax, xmm2, 1 // get x0 integer. preroll
@@ -1209,7 +1252,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
pextrw edx, xmm2, 3 // get x1 integer. preroll
// 2 Pixel loop.
- align 4
xloop2:
movdqa xmm1, xmm2 // x0, x1 fractions.
paddd xmm2, xmm3 // x += dx
@@ -1229,7 +1271,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
sub ecx, 2 // 2 pixels
jge xloop2
- align 4
xloop29:
add ecx, 2 - 1
@@ -1246,7 +1287,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
packuswb xmm0, xmm0 // argb 8 bits, 1 pixel.
movd [edi], xmm0
- align 4
xloop99:
pop edi
@@ -1256,8 +1296,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
}
// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
__asm {
@@ -1265,17 +1304,16 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
mov eax, [esp + 8] // src_argb
mov ecx, [esp + 12] // dst_width
- align 4
wloop:
- movdqa xmm0, [eax]
+ movdqu xmm0, [eax]
lea eax, [eax + 16]
movdqa xmm1, xmm0
punpckldq xmm0, xmm0
punpckhdq xmm1, xmm1
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
lea edx, [edx + 32]
+ sub ecx, 8
jg wloop
ret
@@ -1283,7 +1321,7 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
}
// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
int FixedDiv_X86(int num, int div) {
__asm {
mov eax, [esp + 4] // num
@@ -1296,7 +1334,7 @@ int FixedDiv_X86(int num, int div) {
}
// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
int FixedDiv1_X86(int num, int div) {
__asm {
mov eax, [esp + 4] // num
@@ -1311,8 +1349,7 @@ int FixedDiv1_X86(int num, int div) {
ret
}
}
-
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#ifdef __cplusplus
} // extern "C"
diff --git a/TMessagesProj/jni/libyuv/source/video_common.cc b/TMessagesProj/jni/libyuv/source/video_common.cc
index efbedf46e..379a0669a 100644
--- a/TMessagesProj/jni/libyuv/source/video_common.cc
+++ b/TMessagesProj/jni/libyuv/source/video_common.cc
@@ -33,7 +33,7 @@ static const struct FourCCAliasEntry kFourCCAliases[] = {
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
{FOURCC_DMB1, FOURCC_MJPG},
- {FOURCC_BA81, FOURCC_BGGR},
+ {FOURCC_BA81, FOURCC_BGGR}, // deprecated.
{FOURCC_RGB3, FOURCC_RAW },
{FOURCC_BGR3, FOURCC_24BG},
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
diff --git a/TMessagesProj/jni/libyuv/source/x86inc.asm b/TMessagesProj/jni/libyuv/source/x86inc.asm
deleted file mode 100644
index cb5c32df3..000000000
--- a/TMessagesProj/jni/libyuv/source/x86inc.asm
+++ /dev/null
@@ -1,1136 +0,0 @@
-;*****************************************************************************
-;* x86inc.asm: x264asm abstraction layer
-;*****************************************************************************
-;* Copyright (C) 2005-2012 x264 project
-;*
-;* Authors: Loren Merritt
-;* Anton Mitrofanov
-;* Jason Garrett-Glaser
-;* Henrik Gramner
-;*
-;* Permission to use, copy, modify, and/or distribute this software for any
-;* purpose with or without fee is hereby granted, provided that the above
-;* copyright notice and this permission notice appear in all copies.
-;*
-;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-;*****************************************************************************
-
-; This is a header file for the x264ASM assembly language, which uses
-; NASM/YASM syntax combined with a large number of macros to provide easy
-; abstraction between different calling conventions (x86_32, win64, linux64).
-; It also has various other useful features to simplify writing the kind of
-; DSP functions that are most often used in x264.
-
-; Unlike the rest of x264, this file is available under an ISC license, as it
-; has significant usefulness outside of x264 and we want it to be available
-; to the largest audience possible. Of course, if you modify it for your own
-; purposes to add a new feature, we strongly encourage contributing a patch
-; as this feature might be useful for others as well. Send patches or ideas
-; to x264-devel@videolan.org .
-
-; Local changes for libyuv:
-; remove %define program_name and references in labels
-; rename cpus to uppercase
-
-%define WIN64 0
-%define UNIX64 0
-%if ARCH_X86_64
- %ifidn __OUTPUT_FORMAT__,win32
- %define WIN64 1
- %elifidn __OUTPUT_FORMAT__,win64
- %define WIN64 1
- %else
- %define UNIX64 1
- %endif
-%endif
-
-%ifdef PREFIX
- %define mangle(x) _ %+ x
-%else
- %define mangle(x) x
-%endif
-
-; Name of the .rodata section.
-; Kludge: Something on OS X fails to align .rodata even given an align attribute,
-; so use a different read-only section.
-%macro SECTION_RODATA 0-1 16
- %ifidn __OUTPUT_FORMAT__,macho64
- SECTION .text align=%1
- %elifidn __OUTPUT_FORMAT__,macho
- SECTION .text align=%1
- fakegot:
- %elifidn __OUTPUT_FORMAT__,aout
- section .text
- %else
- SECTION .rodata align=%1
- %endif
-%endmacro
-
-; aout does not support align=
-%macro SECTION_TEXT 0-1 16
- %ifidn __OUTPUT_FORMAT__,aout
- SECTION .text
- %else
- SECTION .text align=%1
- %endif
-%endmacro
-
-%if WIN64
- %define PIC
-%elif ARCH_X86_64 == 0
-; x86_32 doesn't require PIC.
-; Some distros prefer shared objects to be PIC, but nothing breaks if
-; the code contains a few textrels, so we'll skip that complexity.
- %undef PIC
-%endif
-%ifdef PIC
- default rel
-%endif
-
-; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPU amdnop
-
-; Macros to eliminate most code duplication between x86_32 and x86_64:
-; Currently this works only for leaf functions which load all their arguments
-; into registers at the start, and make no other use of the stack. Luckily that
-; covers most of x264's asm.
-
-; PROLOGUE:
-; %1 = number of arguments. loads them from stack if needed.
-; %2 = number of registers used. pushes callee-saved regs if needed.
-; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = list of names to define to registers
-; PROLOGUE can also be invoked by adding the same options to cglobal
-
-; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
-
-; TODO Some functions can use some args directly from the stack. If they're the
-; last args then you can just not declare them, but if they're in the middle
-; we need more flexible macro.
-
-; RET:
-; Pops anything that was pushed by PROLOGUE, and returns.
-
-; REP_RET:
-; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
-; which are slow when a normal ret follows a branch.
-
-; registers:
-; rN and rNq are the native-size register holding function argument N
-; rNd, rNw, rNb are dword, word, and byte size
-; rNh is the high 8 bits of the word size
-; rNm is the original location of arg N (a register or on the stack), dword
-; rNmp is native size
-
-%macro DECLARE_REG 2-3
- %define r%1q %2
- %define r%1d %2d
- %define r%1w %2w
- %define r%1b %2b
- %define r%1h %2h
- %if %0 == 2
- %define r%1m %2d
- %define r%1mp %2
- %elif ARCH_X86_64 ; memory
- %define r%1m [rsp + stack_offset + %3]
- %define r%1mp qword r %+ %1m
- %else
- %define r%1m [esp + stack_offset + %3]
- %define r%1mp dword r %+ %1m
- %endif
- %define r%1 %2
-%endmacro
-
-%macro DECLARE_REG_SIZE 3
- %define r%1q r%1
- %define e%1q r%1
- %define r%1d e%1
- %define e%1d e%1
- %define r%1w %1
- %define e%1w %1
- %define r%1h %3
- %define e%1h %3
- %define r%1b %2
- %define e%1b %2
-%if ARCH_X86_64 == 0
- %define r%1 e%1
-%endif
-%endmacro
-
-DECLARE_REG_SIZE ax, al, ah
-DECLARE_REG_SIZE bx, bl, bh
-DECLARE_REG_SIZE cx, cl, ch
-DECLARE_REG_SIZE dx, dl, dh
-DECLARE_REG_SIZE si, sil, null
-DECLARE_REG_SIZE di, dil, null
-DECLARE_REG_SIZE bp, bpl, null
-
-; t# defines for when per-arch register allocation is more complex than just function arguments
-
-%macro DECLARE_REG_TMP 1-*
- %assign %%i 0
- %rep %0
- CAT_XDEFINE t, %%i, r%1
- %assign %%i %%i+1
- %rotate 1
- %endrep
-%endmacro
-
-%macro DECLARE_REG_TMP_SIZE 0-*
- %rep %0
- %define t%1q t%1 %+ q
- %define t%1d t%1 %+ d
- %define t%1w t%1 %+ w
- %define t%1h t%1 %+ h
- %define t%1b t%1 %+ b
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
-
-%if ARCH_X86_64
- %define gprsize 8
-%else
- %define gprsize 4
-%endif
-
-%macro PUSH 1
- push %1
- %assign stack_offset stack_offset+gprsize
-%endmacro
-
-%macro POP 1
- pop %1
- %assign stack_offset stack_offset-gprsize
-%endmacro
-
-%macro PUSH_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- PUSH r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro POP_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- pop r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro LOAD_IF_USED 1-*
- %rep %0
- %if %1 < num_args
- mov r%1, r %+ %1 %+ mp
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro SUB 2
- sub %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset+(%2)
- %endif
-%endmacro
-
-%macro ADD 2
- add %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset-(%2)
- %endif
-%endmacro
-
-%macro movifnidn 2
- %ifnidn %1, %2
- mov %1, %2
- %endif
-%endmacro
-
-%macro movsxdifnidn 2
- %ifnidn %1, %2
- movsxd %1, %2
- %endif
-%endmacro
-
-%macro ASSERT 1
- %if (%1) == 0
- %error assert failed
- %endif
-%endmacro
-
-%macro DEFINE_ARGS 0-*
- %ifdef n_arg_names
- %assign %%i 0
- %rep n_arg_names
- CAT_UNDEF arg_name %+ %%i, q
- CAT_UNDEF arg_name %+ %%i, d
- CAT_UNDEF arg_name %+ %%i, w
- CAT_UNDEF arg_name %+ %%i, h
- CAT_UNDEF arg_name %+ %%i, b
- CAT_UNDEF arg_name %+ %%i, m
- CAT_UNDEF arg_name %+ %%i, mp
- CAT_UNDEF arg_name, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-
- %xdefine %%stack_offset stack_offset
- %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
- %assign %%i 0
- %rep %0
- %xdefine %1q r %+ %%i %+ q
- %xdefine %1d r %+ %%i %+ d
- %xdefine %1w r %+ %%i %+ w
- %xdefine %1h r %+ %%i %+ h
- %xdefine %1b r %+ %%i %+ b
- %xdefine %1m r %+ %%i %+ m
- %xdefine %1mp r %+ %%i %+ mp
- CAT_XDEFINE arg_name, %%i, %1
- %assign %%i %%i+1
- %rotate 1
- %endrep
- %xdefine stack_offset %%stack_offset
- %assign n_arg_names %0
-%endmacro
-
-%if WIN64 ; Windows x64 ;=================================================
-
-DECLARE_REG 0, rcx
-DECLARE_REG 1, rdx
-DECLARE_REG 2, R8
-DECLARE_REG 3, R9
-DECLARE_REG 4, R10, 40
-DECLARE_REG 5, R11, 48
-DECLARE_REG 6, rax, 56
-DECLARE_REG 7, rdi, 64
-DECLARE_REG 8, rsi, 72
-DECLARE_REG 9, rbx, 80
-DECLARE_REG 10, rbp, 88
-DECLARE_REG 11, R12, 96
-DECLARE_REG 12, R13, 104
-DECLARE_REG 13, R14, 112
-DECLARE_REG 14, R15, 120
-
-%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- ASSERT regs_used <= 15
- PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
- %if mmsize == 8
- %assign xmm_regs_used 0
- %else
- WIN64_SPILL_XMM %3
- %endif
- LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS %4
-%endmacro
-
-%macro WIN64_SPILL_XMM 1
- %assign xmm_regs_used %1
- ASSERT xmm_regs_used <= 16
- %if xmm_regs_used > 6
- SUB rsp, (xmm_regs_used-6)*16+16
- %assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
- %assign %%i %%i-1
- movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
- %endrep
- %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM_INTERNAL 1
- %if xmm_regs_used > 6
- %assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
- %assign %%i %%i-1
- movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
- %endrep
- add %1, (xmm_regs_used-6)*16+16
- %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM 1
- WIN64_RESTORE_XMM_INTERNAL %1
- %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
- %assign xmm_regs_used 0
-%endmacro
-
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
-
-%macro RET 0
- WIN64_RESTORE_XMM_INTERNAL rsp
- POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
-
-DECLARE_REG 0, rdi
-DECLARE_REG 1, rsi
-DECLARE_REG 2, rdx
-DECLARE_REG 3, rcx
-DECLARE_REG 4, R8
-DECLARE_REG 5, R9
-DECLARE_REG 6, rax, 8
-DECLARE_REG 7, R10, 16
-DECLARE_REG 8, R11, 24
-DECLARE_REG 9, rbx, 32
-DECLARE_REG 10, rbp, 40
-DECLARE_REG 11, R12, 48
-DECLARE_REG 12, R13, 56
-DECLARE_REG 13, R14, 64
-DECLARE_REG 14, R15, 72
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- ASSERT regs_used <= 15
- PUSH_IF_USED 9, 10, 11, 12, 13, 14
- LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 9 || mmsize == 32
-
-%macro RET 0
- POP_IF_USED 14, 13, 12, 11, 10, 9
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%else ; X86_32 ;==============================================================
-
-DECLARE_REG 0, eax, 4
-DECLARE_REG 1, ecx, 8
-DECLARE_REG 2, edx, 12
-DECLARE_REG 3, ebx, 16
-DECLARE_REG 4, esi, 20
-DECLARE_REG 5, edi, 24
-DECLARE_REG 6, ebp, 28
-%define rsp esp
-
-%macro DECLARE_ARG 1-*
- %rep %0
- %define r%1m [esp + stack_offset + 4*%1 + 4]
- %define r%1mp dword r%1m
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- %if regs_used > 7
- %assign regs_used 7
- %endif
- ASSERT regs_used >= num_args
- PUSH_IF_USED 3, 4, 5, 6
- LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
- DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 3 || mmsize == 32
-
-%macro RET 0
- POP_IF_USED 6, 5, 4, 3
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%endif ;======================================================================
-
-%if WIN64 == 0
-%macro WIN64_SPILL_XMM 1
-%endmacro
-%macro WIN64_RESTORE_XMM 1
-%endmacro
-%endif
-
-%macro REP_RET 0
- %if has_epilogue
- RET
- %else
- rep ret
- %endif
-%endmacro
-
-%macro TAIL_CALL 2 ; callee, is_nonadjacent
- %if has_epilogue
- call %1
- RET
- %elif %2
- jmp %1
- %endif
-%endmacro
-
-;=============================================================================
-; arch-independent part
-;=============================================================================
-
-%assign function_align 16
-
-; Begin a function.
-; Applies any symbol mangling needed for C linkage, and sets up a define such that
-; subsequent uses of the function name automatically refer to the mangled version.
-; Appends cpuflags to the function name if cpuflags has been specified.
-%macro cglobal 1-2+ ; name, [PROLOGUE args]
-%if %0 == 1
- cglobal_internal %1 %+ SUFFIX
-%else
- cglobal_internal %1 %+ SUFFIX, %2
-%endif
-%endmacro
-%macro cglobal_internal 1-2+
- %ifndef cglobaled_%1
- %xdefine %1 mangle(%1)
- %xdefine %1.skip_prologue %1 %+ .skip_prologue
- CAT_XDEFINE cglobaled_, %1, 1
- %endif
- %xdefine current_function %1
- %ifidn __OUTPUT_FORMAT__,elf
- global %1:function hidden
- %else
- global %1
- %endif
- align function_align
- %1:
- RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
- %assign stack_offset 0
- %if %0 > 1
- PROLOGUE %2
- %endif
-%endmacro
-
-%macro cextern 1
- %xdefine %1 mangle(%1)
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-; like cextern, but without the prefix
-%macro cextern_naked 1
- %xdefine %1 mangle(%1)
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-%macro const 2+
- %xdefine %1 mangle(%1)
- global %1
- %1: %2
-%endmacro
-
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf32
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf64
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-
-; cpuflags
-
-%assign cpuflags_MMX (1<<0)
-%assign cpuflags_MMX2 (1<<1) | cpuflags_MMX
-%assign cpuflags_3dnow (1<<2) | cpuflags_MMX
-%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
-%assign cpuflags_SSE (1<<4) | cpuflags_MMX2
-%assign cpuflags_SSE2 (1<<5) | cpuflags_SSE
-%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
-%assign cpuflags_SSE3 (1<<7) | cpuflags_SSE2
-%assign cpuflags_SSSE3 (1<<8) | cpuflags_SSE3
-%assign cpuflags_SSE4 (1<<9) | cpuflags_SSSE3
-%assign cpuflags_SSE42 (1<<10)| cpuflags_SSE4
-%assign cpuflags_AVX (1<<11)| cpuflags_SSE42
-%assign cpuflags_xop (1<<12)| cpuflags_AVX
-%assign cpuflags_fma4 (1<<13)| cpuflags_AVX
-%assign cpuflags_AVX2 (1<<14)| cpuflags_AVX
-%assign cpuflags_fma3 (1<<15)| cpuflags_AVX
-
-%assign cpuflags_cache32 (1<<16)
-%assign cpuflags_cache64 (1<<17)
-%assign cpuflags_slowctz (1<<18)
-%assign cpuflags_lzcnt (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom (1<<22)
-%assign cpuflags_bmi1 (1<<23)
-%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1
-%assign cpuflags_tbm (1<<25)|cpuflags_bmi1
-
-%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
-
-; Takes up to 2 cpuflags from the above list.
-; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
-; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
-%macro INIT_CPUFLAGS 0-2
- %if %0 >= 1
- %xdefine cpuname %1
- %assign cpuflags cpuflags_%1
- %if %0 >= 2
- %xdefine cpuname %1_%2
- %assign cpuflags cpuflags | cpuflags_%2
- %endif
- %xdefine SUFFIX _ %+ cpuname
- %if cpuflag(AVX)
- %assign AVX_enabled 1
- %endif
- %if mmsize == 16 && notcpuflag(SSE2)
- %define mova movaps
- %define movu movups
- %define movnta movntps
- %endif
- %if cpuflag(aligned)
- %define movu mova
- %elifidn %1, SSE3
- %define movu lddqu
- %endif
- %else
- %xdefine SUFFIX
- %undef cpuname
- %undef cpuflags
- %endif
-%endmacro
-
-; merge MMX and SSE*
-
-%macro CAT_XDEFINE 3
- %xdefine %1%2 %3
-%endmacro
-
-%macro CAT_UNDEF 2
- %undef %1%2
-%endmacro
-
-%macro INIT_MMX 0-1+
- %assign AVX_enabled 0
- %define RESET_MM_PERMUTATION INIT_MMX %1
- %define mmsize 8
- %define num_mmregs 8
- %define mova movq
- %define movu movq
- %define movh movd
- %define movnta movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 8
- CAT_UNDEF m, %%i
- CAT_UNDEF nmm, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_XMM 0-1+
- %assign AVX_enabled 0
- %define RESET_MM_PERMUTATION INIT_XMM %1
- %define mmsize 16
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova movdqa
- %define movu movdqu
- %define movh movq
- %define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_YMM 0-1+
- %assign AVX_enabled 1
- %define RESET_MM_PERMUTATION INIT_YMM %1
- %define mmsize 32
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova vmovaps
- %define movu vmovups
- %undef movh
- %define movnta vmovntps
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nymm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-INIT_XMM
-
-; I often want to use macros that permute their arguments. e.g. there's no
-; efficient way to implement butterfly or transpose or dct without swapping some
-; arguments.
-;
-; I would like to not have to manually keep track of the permutations:
-; If I insert a permutation in the middle of a function, it should automatically
-; change everything that follows. For more complex macros I may also have multiple
-; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
-;
-; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
-; permutes its arguments. It's equivalent to exchanging the contents of the
-; registers, except that this way you exchange the register names instead, so it
-; doesn't cost any cycles.
-
-%macro PERMUTE 2-* ; takes a list of pairs to swap
-%rep %0/2
- %xdefine tmp%2 m%2
- %xdefine ntmp%2 nm%2
- %rotate 2
-%endrep
-%rep %0/2
- %xdefine m%1 tmp%2
- %xdefine nm%1 ntmp%2
- %undef tmp%2
- %undef ntmp%2
- %rotate 2
-%endrep
-%endmacro
-
-%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
-%rep %0-1
-%ifdef m%1
- %xdefine tmp m%1
- %xdefine m%1 m%2
- %xdefine m%2 tmp
- CAT_XDEFINE n, m%1, %1
- CAT_XDEFINE n, m%2, %2
-%else
- ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
- ; Be careful using this mode in nested macros though, as in some cases there may be
- ; other copies of m# that have already been dereferenced and don't get updated correctly.
- %xdefine %%n1 n %+ %1
- %xdefine %%n2 n %+ %2
- %xdefine tmp m %+ %%n1
- CAT_XDEFINE m, %%n1, m %+ %%n2
- CAT_XDEFINE m, %%n2, tmp
- CAT_XDEFINE n, m %+ %%n1, %%n1
- CAT_XDEFINE n, m %+ %%n2, %%n2
-%endif
- %undef tmp
- %rotate 1
-%endrep
-%endmacro
-
-; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
-; calls to that function will automatically load the permutation, so values can
-; be returned in mmregs.
-%macro SAVE_MM_PERMUTATION 0-1
- %if %0
- %xdefine %%f %1_m
- %else
- %xdefine %%f current_function %+ _m
- %endif
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE %%f, %%i, m %+ %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-%macro LOAD_MM_PERMUTATION 1 ; name to load from
- %ifdef %1_m0
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, %1_m %+ %%i
- CAT_XDEFINE n, m %+ %%i, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-%endmacro
-
-; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
-%macro call 1
- call_internal %1, %1 %+ SUFFIX
-%endmacro
-%macro call_internal 2
- %xdefine %%i %1
- %ifndef cglobaled_%1
- %ifdef cglobaled_%2
- %xdefine %%i %2
- %endif
- %endif
- call %%i
- LOAD_MM_PERMUTATION %%i
-%endmacro
-
-; Substitutions that reduce instruction size but are functionally equivalent
-%macro add 2
- %ifnum %2
- %if %2==128
- sub %1, -128
- %else
- add %1, %2
- %endif
- %else
- add %1, %2
- %endif
-%endmacro
-
-%macro sub 2
- %ifnum %2
- %if %2==128
- add %1, -128
- %else
- sub %1, %2
- %endif
- %else
- sub %1, %2
- %endif
-%endmacro
-
-;=============================================================================
-; AVX abstraction layer
-;=============================================================================
-
-%assign i 0
-%rep 16
- %if i < 8
- CAT_XDEFINE sizeofmm, i, 8
- %endif
- CAT_XDEFINE sizeofxmm, i, 16
- CAT_XDEFINE sizeofymm, i, 32
-%assign i i+1
-%endrep
-%undef i
-
-%macro CHECK_AVX_INSTR_EMU 3-*
- %xdefine %%opcode %1
- %xdefine %%dst %2
- %rep %0-2
- %ifidn %%dst, %3
- %error non-AVX emulation of ``%%opcode'' is not supported
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == number of operands given
-;%5+: operands
-%macro RUN_AVX_INSTR 6-7+
- %ifid %6
- %define %%sizeofreg sizeof%6
- %elifid %5
- %define %%sizeofreg sizeof%5
- %else
- %define %%sizeofreg mmsize
- %endif
- %if %%sizeofreg==32
- %if %4>=3
- v%1 %5, %6, %7
- %else
- v%1 %5, %6
- %endif
- %else
- %if %%sizeofreg==8
- %define %%regmov movq
- %elif %2
- %define %%regmov movaps
- %else
- %define %%regmov movdqa
- %endif
-
- %if %4>=3+%3
- %ifnidn %5, %6
- %if AVX_enabled && %%sizeofreg==16
- v%1 %5, %6, %7
- %else
- CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
- %%regmov %5, %6
- %1 %5, %7
- %endif
- %else
- %1 %5, %7
- %endif
- %elif %4>=3
- %1 %5, %6, %7
- %else
- %1 %5, %6
- %endif
- %endif
-%endmacro
-
-; 3arg AVX ops with a memory arg can only have it in src2,
-; whereas SSE emulation of 3arg prefers to have it in src1 (i.e. the mov).
-; So, if the op is symmetric and the wrong one is memory, swap them.
-%macro RUN_AVX_INSTR1 8
- %assign %%swap 0
- %if AVX_enabled
- %ifnid %6
- %assign %%swap 1
- %endif
- %elifnidn %5, %6
- %ifnid %7
- %assign %%swap 1
- %endif
- %endif
- %if %%swap && %3 == 0 && %8 == 1
- RUN_AVX_INSTR %1, %2, %3, %4, %5, %7, %6
- %else
- RUN_AVX_INSTR %1, %2, %3, %4, %5, %6, %7
- %endif
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
-%macro AVX_INSTR 4
- %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
- %ifidn %3, fnord
- RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
- %elifidn %4, fnord
- RUN_AVX_INSTR1 %6, %7, %8, 3, %1, %2, %3, %9
- %elifidn %5, fnord
- RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
- %else
- RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
- %endif
- %endmacro
-%endmacro
-
-AVX_INSTR addpd, 1, 0, 1
-AVX_INSTR addps, 1, 0, 1
-AVX_INSTR addsd, 1, 0, 1
-AVX_INSTR addss, 1, 0, 1
-AVX_INSTR addsubpd, 1, 0, 0
-AVX_INSTR addsubps, 1, 0, 0
-AVX_INSTR andpd, 1, 0, 1
-AVX_INSTR andps, 1, 0, 1
-AVX_INSTR andnpd, 1, 0, 0
-AVX_INSTR andnps, 1, 0, 0
-AVX_INSTR blendpd, 1, 0, 0
-AVX_INSTR blendps, 1, 0, 0
-AVX_INSTR blendvpd, 1, 0, 0
-AVX_INSTR blendvps, 1, 0, 0
-AVX_INSTR cmppd, 1, 0, 0
-AVX_INSTR cmpps, 1, 0, 0
-AVX_INSTR cmpsd, 1, 0, 0
-AVX_INSTR cmpss, 1, 0, 0
-AVX_INSTR cvtdq2ps, 1, 0, 0
-AVX_INSTR cvtps2dq, 1, 0, 0
-AVX_INSTR divpd, 1, 0, 0
-AVX_INSTR divps, 1, 0, 0
-AVX_INSTR divsd, 1, 0, 0
-AVX_INSTR divss, 1, 0, 0
-AVX_INSTR dppd, 1, 1, 0
-AVX_INSTR dpps, 1, 1, 0
-AVX_INSTR haddpd, 1, 0, 0
-AVX_INSTR haddps, 1, 0, 0
-AVX_INSTR hsubpd, 1, 0, 0
-AVX_INSTR hsubps, 1, 0, 0
-AVX_INSTR maxpd, 1, 0, 1
-AVX_INSTR maxps, 1, 0, 1
-AVX_INSTR maxsd, 1, 0, 1
-AVX_INSTR maxss, 1, 0, 1
-AVX_INSTR minpd, 1, 0, 1
-AVX_INSTR minps, 1, 0, 1
-AVX_INSTR minsd, 1, 0, 1
-AVX_INSTR minss, 1, 0, 1
-AVX_INSTR movhlps, 1, 0, 0
-AVX_INSTR movlhps, 1, 0, 0
-AVX_INSTR movsd, 1, 0, 0
-AVX_INSTR movss, 1, 0, 0
-AVX_INSTR mpsadbw, 0, 1, 0
-AVX_INSTR mulpd, 1, 0, 1
-AVX_INSTR mulps, 1, 0, 1
-AVX_INSTR mulsd, 1, 0, 1
-AVX_INSTR mulss, 1, 0, 1
-AVX_INSTR orpd, 1, 0, 1
-AVX_INSTR orps, 1, 0, 1
-AVX_INSTR pabsb, 0, 0, 0
-AVX_INSTR pabsw, 0, 0, 0
-AVX_INSTR pabsd, 0, 0, 0
-AVX_INSTR packsswb, 0, 0, 0
-AVX_INSTR packssdw, 0, 0, 0
-AVX_INSTR packuswb, 0, 0, 0
-AVX_INSTR packusdw, 0, 0, 0
-AVX_INSTR paddb, 0, 0, 1
-AVX_INSTR paddw, 0, 0, 1
-AVX_INSTR paddd, 0, 0, 1
-AVX_INSTR paddq, 0, 0, 1
-AVX_INSTR paddsb, 0, 0, 1
-AVX_INSTR paddsw, 0, 0, 1
-AVX_INSTR paddusb, 0, 0, 1
-AVX_INSTR paddusw, 0, 0, 1
-AVX_INSTR palignr, 0, 1, 0
-AVX_INSTR pand, 0, 0, 1
-AVX_INSTR pandn, 0, 0, 0
-AVX_INSTR pavgb, 0, 0, 1
-AVX_INSTR pavgw, 0, 0, 1
-AVX_INSTR pblendvb, 0, 0, 0
-AVX_INSTR pblendw, 0, 1, 0
-AVX_INSTR pcmpestri, 0, 0, 0
-AVX_INSTR pcmpestrm, 0, 0, 0
-AVX_INSTR pcmpistri, 0, 0, 0
-AVX_INSTR pcmpistrm, 0, 0, 0
-AVX_INSTR pcmpeqb, 0, 0, 1
-AVX_INSTR pcmpeqw, 0, 0, 1
-AVX_INSTR pcmpeqd, 0, 0, 1
-AVX_INSTR pcmpeqq, 0, 0, 1
-AVX_INSTR pcmpgtb, 0, 0, 0
-AVX_INSTR pcmpgtw, 0, 0, 0
-AVX_INSTR pcmpgtd, 0, 0, 0
-AVX_INSTR pcmpgtq, 0, 0, 0
-AVX_INSTR phaddw, 0, 0, 0
-AVX_INSTR phaddd, 0, 0, 0
-AVX_INSTR phaddsw, 0, 0, 0
-AVX_INSTR phsubw, 0, 0, 0
-AVX_INSTR phsubd, 0, 0, 0
-AVX_INSTR phsubsw, 0, 0, 0
-AVX_INSTR pmaddwd, 0, 0, 1
-AVX_INSTR pmaddubsw, 0, 0, 0
-AVX_INSTR pmaxsb, 0, 0, 1
-AVX_INSTR pmaxsw, 0, 0, 1
-AVX_INSTR pmaxsd, 0, 0, 1
-AVX_INSTR pmaxub, 0, 0, 1
-AVX_INSTR pmaxuw, 0, 0, 1
-AVX_INSTR pmaxud, 0, 0, 1
-AVX_INSTR pminsb, 0, 0, 1
-AVX_INSTR pminsw, 0, 0, 1
-AVX_INSTR pminsd, 0, 0, 1
-AVX_INSTR pminub, 0, 0, 1
-AVX_INSTR pminuw, 0, 0, 1
-AVX_INSTR pminud, 0, 0, 1
-AVX_INSTR pmovmskb, 0, 0, 0
-AVX_INSTR pmulhuw, 0, 0, 1
-AVX_INSTR pmulhrsw, 0, 0, 1
-AVX_INSTR pmulhw, 0, 0, 1
-AVX_INSTR pmullw, 0, 0, 1
-AVX_INSTR pmulld, 0, 0, 1
-AVX_INSTR pmuludq, 0, 0, 1
-AVX_INSTR pmuldq, 0, 0, 1
-AVX_INSTR por, 0, 0, 1
-AVX_INSTR psadbw, 0, 0, 1
-AVX_INSTR pshufb, 0, 0, 0
-AVX_INSTR pshufd, 0, 1, 0
-AVX_INSTR pshufhw, 0, 1, 0
-AVX_INSTR pshuflw, 0, 1, 0
-AVX_INSTR psignb, 0, 0, 0
-AVX_INSTR psignw, 0, 0, 0
-AVX_INSTR psignd, 0, 0, 0
-AVX_INSTR psllw, 0, 0, 0
-AVX_INSTR pslld, 0, 0, 0
-AVX_INSTR psllq, 0, 0, 0
-AVX_INSTR pslldq, 0, 0, 0
-AVX_INSTR psraw, 0, 0, 0
-AVX_INSTR psrad, 0, 0, 0
-AVX_INSTR psrlw, 0, 0, 0
-AVX_INSTR psrld, 0, 0, 0
-AVX_INSTR psrlq, 0, 0, 0
-AVX_INSTR psrldq, 0, 0, 0
-AVX_INSTR psubb, 0, 0, 0
-AVX_INSTR psubw, 0, 0, 0
-AVX_INSTR psubd, 0, 0, 0
-AVX_INSTR psubq, 0, 0, 0
-AVX_INSTR psubsb, 0, 0, 0
-AVX_INSTR psubsw, 0, 0, 0
-AVX_INSTR psubusb, 0, 0, 0
-AVX_INSTR psubusw, 0, 0, 0
-AVX_INSTR ptest, 0, 0, 0
-AVX_INSTR punpckhbw, 0, 0, 0
-AVX_INSTR punpckhwd, 0, 0, 0
-AVX_INSTR punpckhdq, 0, 0, 0
-AVX_INSTR punpckhqdq, 0, 0, 0
-AVX_INSTR punpcklbw, 0, 0, 0
-AVX_INSTR punpcklwd, 0, 0, 0
-AVX_INSTR punpckldq, 0, 0, 0
-AVX_INSTR punpcklqdq, 0, 0, 0
-AVX_INSTR pxor, 0, 0, 1
-AVX_INSTR shufps, 1, 1, 0
-AVX_INSTR subpd, 1, 0, 0
-AVX_INSTR subps, 1, 0, 0
-AVX_INSTR subsd, 1, 0, 0
-AVX_INSTR subss, 1, 0, 0
-AVX_INSTR unpckhpd, 1, 0, 0
-AVX_INSTR unpckhps, 1, 0, 0
-AVX_INSTR unpcklpd, 1, 0, 0
-AVX_INSTR unpcklps, 1, 0, 0
-AVX_INSTR xorpd, 1, 0, 1
-AVX_INSTR xorps, 1, 0, 1
-
-; 3DNow instructions, for sharing code between AVX, SSE and 3DN
-AVX_INSTR pfadd, 1, 0, 1
-AVX_INSTR pfsub, 1, 0, 0
-AVX_INSTR pfmul, 1, 0, 1
-
-; base-4 constants for shuffles
-%assign i 0
-%rep 256
- %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
- %if j < 10
- CAT_XDEFINE q000, j, i
- %elif j < 100
- CAT_XDEFINE q00, j, i
- %elif j < 1000
- CAT_XDEFINE q0, j, i
- %else
- CAT_XDEFINE q, j, i
- %endif
-%assign i i+1
-%endrep
-%undef i
-%undef j
-
-%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
- %else
- %6 %1, %2, %3
- %7 %1, %4
- %endif
- %endmacro
-%endmacro
-
-FMA_INSTR pmacsdd, pmulld, paddd
-FMA_INSTR pmacsww, pmullw, paddw
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
-; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
-; This lets us use tzcnt without bumping the yasm version requirement yet.
-%define tzcnt rep bsf
diff --git a/TMessagesProj/jni/tgnet/ConnectionsManager.cpp b/TMessagesProj/jni/tgnet/ConnectionsManager.cpp
index eed99950d..1b8a85b74 100644
--- a/TMessagesProj/jni/tgnet/ConnectionsManager.cpp
+++ b/TMessagesProj/jni/tgnet/ConnectionsManager.cpp
@@ -715,18 +715,7 @@ void ConnectionsManager::onConnectionDataReceived(Connection *connection, Native
}
if (!doNotProcess) {
- TLObject *object = nullptr;
- if (connection->getConnectionType() == ConnectionTypePush) {
- uint32_t position = data->position();
- uint32_t constructor = data->readUint32(&error);
- if (constructor == TL_updatesTooLong::constructor) {
- object = new TL_updatesTooLong();
- }
- data->position(position);
- }
- if (object == nullptr) {
- object = TLdeserialize(nullptr, messageLength, data);
- }
+ TLObject *object = TLdeserialize(nullptr, messageLength, data);
if (object != nullptr) {
DEBUG_D("connection(%p, dc%u, type %d) received object %s", connection, datacenter->getDatacenterId(), connection->getConnectionType(), typeid(*object).name());
processServerResponse(object, messageId, messageSeqNo, messageServerSalt, connection, 0, 0);
@@ -959,6 +948,8 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag
TLObject *object = TLdeserialize(request->rawRequest, unpacked_data->limit(), unpacked_data);
if (object != nullptr) {
response->result = std::unique_ptr(object);
+ } else {
+ response->result = std::unique_ptr(nullptr);
}
}
@@ -1215,19 +1206,29 @@ void ConnectionsManager::processServerResponse(TLObject *message, int64_t messag
}
}
data->reuse();
- } else if (connection->connectionType == ConnectionTypePush && typeInfo == typeid(TL_updatesTooLong)) {
- if (networkPaused) {
- lastPauseTime = getCurrentTimeMillis();
- nextSleepTimeout = 30000;
- DEBUG_D("received internal push: wakeup network in background");
- } else if (lastPauseTime != 0) {
- lastPauseTime = getCurrentTimeMillis();
- DEBUG_D("received internal push: reset sleep timeout");
+ } else if (typeInfo == typeid(TL_updatesTooLong)) {
+ if (connection->connectionType == ConnectionTypePush) {
+ if (networkPaused) {
+ lastPauseTime = getCurrentTimeMillis();
+ nextSleepTimeout = 30000;
+ DEBUG_D("received internal push: wakeup network in background");
+ } else if (lastPauseTime != 0) {
+ lastPauseTime = getCurrentTimeMillis();
+ DEBUG_D("received internal push: reset sleep timeout");
+ } else {
+ DEBUG_D("received internal push");
+ }
+ if (delegate != nullptr) {
+ delegate->onInternalPushReceived();
+ }
} else {
- DEBUG_D("received internal push");
- }
- if (delegate != nullptr) {
- delegate->onInternalPushReceived();
+ if (delegate != nullptr) {
+ NativeByteBuffer *data = BuffersStorage::getInstance().getFreeBuffer(message->getObjectSize());
+ message->serializeToStream(data);
+ data->position(0);
+ delegate->onUnparsedMessageReceived(0, data, connection->getConnectionType());
+ data->reuse();
+ }
}
}
}
@@ -2223,7 +2224,7 @@ void ConnectionsManager::updateDcSettings(uint32_t dcNum) {
return;
}
- if (error == nullptr) {
+ if (response != nullptr) {
TL_config *config = (TL_config *) response;
int32_t updateIn = config->expires - getCurrentTime();
if (updateIn <= 0) {
diff --git a/TMessagesProj/jni/tgnet/MTProtoScheme.cpp b/TMessagesProj/jni/tgnet/MTProtoScheme.cpp
index 968fd02a0..a88f591e4 100644
--- a/TMessagesProj/jni/tgnet/MTProtoScheme.cpp
+++ b/TMessagesProj/jni/tgnet/MTProtoScheme.cpp
@@ -66,6 +66,9 @@ TLObject *TLClassStore::TLdeserialize(NativeByteBuffer *stream, uint32_t bytes,
case TL_destroy_session_none::constructor:
object = new TL_destroy_session_none();
break;
+ case TL_updatesTooLong::constructor:
+ object = new TL_updatesTooLong();
+ break;
default:
return nullptr;
}
@@ -942,6 +945,7 @@ void TL_config::readParams(NativeByteBuffer *stream, bool &error) {
chat_big_size = stream->readInt32(&error);
push_chat_period_ms = stream->readInt32(&error);
push_chat_limit = stream->readInt32(&error);
+ saved_gifs_limit = stream->readInt32(&error);
magic = stream->readUint32(&error);
if (magic != 0x1cb5c415) {
error = true;
@@ -982,6 +986,7 @@ void TL_config::serializeToStream(NativeByteBuffer *stream) {
stream->writeInt32(chat_big_size);
stream->writeInt32(push_chat_period_ms);
stream->writeInt32(push_chat_limit);
+ stream->writeInt32(saved_gifs_limit);
stream->writeInt32(0x1cb5c415);
count = (uint32_t) disabled_features.size();
stream->writeInt32(count);
@@ -1053,7 +1058,7 @@ User *User::TLdeserialize(NativeByteBuffer *stream, uint32_t constructor, bool &
case 0x200250ba:
result = new TL_userEmpty();
break;
- case 0x22e49072:
+ case 0xd10d979a:
result = new TL_user();
break;
default:
@@ -1101,6 +1106,12 @@ void TL_user::readParams(NativeByteBuffer *stream, bool &error) {
if ((flags & 16384) != 0) {
bot_info_version = stream->readInt32(&error);
}
+ if ((flags & 262144) != 0) {
+ restriction_reason = stream->readString(&error);
+ }
+ if ((flags & 524288) != 0) {
+ bot_inline_placeholder = stream->readString(&error);
+ }
}
void TL_user::serializeToStream(NativeByteBuffer *stream) {
@@ -1131,6 +1142,12 @@ void TL_user::serializeToStream(NativeByteBuffer *stream) {
if ((flags & 16384) != 0) {
stream->writeInt32(bot_info_version);
}
+ if ((flags & 262144) != 0) {
+ stream->writeString(restriction_reason);
+ }
+ if ((flags & 524288) != 0) {
+ stream->writeString(bot_inline_placeholder);
+ }
}
TL_auth_authorization *TL_auth_authorization::TLdeserialize(NativeByteBuffer *stream, uint32_t constructor, bool &error) {
@@ -1396,3 +1413,7 @@ void TL_auth_sendCode::serializeToStream(NativeByteBuffer *stream) {
stream->writeString(api_hash);
stream->writeString(lang_code);
}
+
+void TL_updatesTooLong::serializeToStream(NativeByteBuffer *stream) {
+ stream->writeInt32(constructor);
+}
diff --git a/TMessagesProj/jni/tgnet/MTProtoScheme.h b/TMessagesProj/jni/tgnet/MTProtoScheme.h
index baf35ee08..e6e399bd3 100644
--- a/TMessagesProj/jni/tgnet/MTProtoScheme.h
+++ b/TMessagesProj/jni/tgnet/MTProtoScheme.h
@@ -657,7 +657,7 @@ public:
class TL_config : public TLObject {
public:
- static const uint32_t constructor = 0x6cb6e65e;
+ static const uint32_t constructor = 0x6bbc5f8;
int32_t date;
int32_t expires;
@@ -676,6 +676,7 @@ public:
int32_t chat_big_size;
int32_t push_chat_period_ms;
int32_t push_chat_limit;
+ int32_t saved_gifs_limit;
std::vector> disabled_features;
static TL_config *TLdeserialize(NativeByteBuffer *stream, uint32_t constructor, bool &error);
@@ -860,6 +861,8 @@ public:
std::unique_ptr status;
int32_t flags;
int32_t bot_info_version;
+ std::string restriction_reason;
+ std::string bot_inline_placeholder;
static User *TLdeserialize(NativeByteBuffer *stream, uint32_t constructor, bool &error);
};
@@ -876,7 +879,7 @@ public:
class TL_user : public User {
public:
- static const uint32_t constructor = 0x22e49072;
+ static const uint32_t constructor = 0xd10d979a;
void readParams(NativeByteBuffer *stream, bool &error);
void serializeToStream(NativeByteBuffer *stream);
@@ -978,6 +981,8 @@ class TL_updatesTooLong : public TLObject {
public:
static const uint32_t constructor = 0xe317af7e;
+
+ void serializeToStream(NativeByteBuffer *stream);
};
#endif
diff --git a/TMessagesProj/jni/tgnet/Request.cpp b/TMessagesProj/jni/tgnet/Request.cpp
index 02f3b05ab..583e8ffe1 100644
--- a/TMessagesProj/jni/tgnet/Request.cpp
+++ b/TMessagesProj/jni/tgnet/Request.cpp
@@ -53,7 +53,7 @@ void Request::clear(bool time) {
}
void Request::onComplete(TLObject *result, TL_error *error) {
- if (onCompleteRequestCallback != nullptr) {
+ if (onCompleteRequestCallback != nullptr && (result != nullptr || error != nullptr)) {
onCompleteRequestCallback(result, error);
}
}
diff --git a/TMessagesProj/jni/utils.h b/TMessagesProj/jni/utils.h
index f17ae9025..34805a1d7 100644
--- a/TMessagesProj/jni/utils.h
+++ b/TMessagesProj/jni/utils.h
@@ -4,7 +4,7 @@
#include
#include
-#define LOG_TAG "tmessages"
+#define LOG_TAG "tmessages_native"
#ifndef LOG_DISABLED
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
diff --git a/TMessagesProj/proguard-rules.pro b/TMessagesProj/proguard-rules.pro
new file mode 100644
index 000000000..cc87272f6
--- /dev/null
+++ b/TMessagesProj/proguard-rules.pro
@@ -0,0 +1,8 @@
+-keep public class com.google.android.gms.* { public *; }
+-keepnames @com.google.android.gms.common.annotation.KeepName class *
+-keepclassmembernames class * {
+ @com.google.android.gms.common.annotation.KeepName *;
+}
+-dontwarn com.google.android.gms.**
+-dontwarn com.google.common.cache.**
+-dontwarn com.google.common.primitives.**
\ No newline at end of file
diff --git a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteCursor.java b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteCursor.java
index 01275c5a0..b667c7fca 100755
--- a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteCursor.java
+++ b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteCursor.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;
diff --git a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteDatabase.java b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteDatabase.java
index eb8bd6ada..c68c62db2 100755
--- a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteDatabase.java
+++ b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteDatabase.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;
diff --git a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteException.java b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteException.java
index 2540c0947..fa0680886 100755
--- a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteException.java
+++ b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteException.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;
diff --git a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteNoRowException.java b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteNoRowException.java
index 944d21430..2db0736c2 100755
--- a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteNoRowException.java
+++ b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLiteNoRowException.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;
diff --git a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLitePreparedStatement.java b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLitePreparedStatement.java
index cd933701c..6135e037f 100755
--- a/TMessagesProj/src/main/java/org/telegram/SQLite/SQLitePreparedStatement.java
+++ b/TMessagesProj/src/main/java/org/telegram/SQLite/SQLitePreparedStatement.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.SQLite;
@@ -135,6 +135,10 @@ public class SQLitePreparedStatement {
bindLong(sqliteStatementHandle, index, value);
}
+ public void bindNull(int index) throws SQLiteException {
+ bindNull(sqliteStatementHandle, index);
+ }
+
native void bindByteBuffer(int statementHandle, int index, ByteBuffer value, int length) throws SQLiteException;
native void bindString(int statementHandle, int index, String value) throws SQLiteException;
native void bindInt(int statementHandle, int index, int value) throws SQLiteException;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AndroidUtilities.java b/TMessagesProj/src/main/java/org/telegram/messenger/AndroidUtilities.java
index ab507911d..370175048 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AndroidUtilities.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AndroidUtilities.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorListenerAdapterProxy.java b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorListenerAdapterProxy.java
index a136fbe58..4a5c55a17 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorListenerAdapterProxy.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorListenerAdapterProxy.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.AnimationCompat;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorSetProxy.java b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorSetProxy.java
index ac3663877..e6cccb0a2 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorSetProxy.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/AnimatorSetProxy.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.AnimationCompat;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ObjectAnimatorProxy.java b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ObjectAnimatorProxy.java
index 392c25506..8bf69db6a 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ObjectAnimatorProxy.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ObjectAnimatorProxy.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.AnimationCompat;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ViewProxy.java b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ViewProxy.java
index e313acb8f..50a2690fa 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ViewProxy.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AnimationCompat/ViewProxy.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.AnimationCompat;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AppStartReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/AppStartReceiver.java
index 153f68966..4a3980854 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AppStartReceiver.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AppStartReceiver.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ApplicationLoader.java b/TMessagesProj/src/main/java/org/telegram/messenger/ApplicationLoader.java
index 1e485197b..3babf6e3a 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ApplicationLoader.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ApplicationLoader.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -22,7 +22,6 @@ import android.content.pm.PackageInfo;
import android.content.res.Configuration;
import android.graphics.drawable.ColorDrawable;
import android.graphics.drawable.Drawable;
-import android.os.AsyncTask;
import android.os.Build;
import android.os.Handler;
import android.os.PowerManager;
@@ -30,7 +29,6 @@ import android.util.Base64;
import com.google.android.gms.common.ConnectionResult;
import com.google.android.gms.common.GooglePlayServicesUtil;
-import com.google.android.gms.gcm.GoogleCloudMessaging;
import org.telegram.tgnet.ConnectionsManager;
import org.telegram.tgnet.SerializedData;
@@ -39,17 +37,9 @@ import org.telegram.ui.Components.ForegroundDetector;
import java.io.File;
import java.io.RandomAccessFile;
-import java.util.concurrent.atomic.AtomicInteger;
public class ApplicationLoader extends Application {
- private GoogleCloudMessaging gcm;
- private AtomicInteger msgId = new AtomicInteger();
- private String regid;
- public static final String EXTRA_MESSAGE = "message";
- public static final String PROPERTY_REG_ID = "registration_id";
- private static final String PROPERTY_APP_VERSION = "appVersion";
- private static final int PLAY_SERVICES_RESOLUTION_REQUEST = 9000;
private static Drawable cachedWallpaper;
private static int selectedColor;
private static boolean isCustomTheme;
@@ -331,13 +321,10 @@ public class ApplicationLoader extends Application {
@Override
public void run() {
if (checkPlayServices()) {
- gcm = GoogleCloudMessaging.getInstance(ApplicationLoader.this);
- regid = getRegistrationId();
-
- if (regid.length() == 0) {
- registerInBackground();
- } else {
- sendRegistrationIdToBackend(false);
+ if (UserConfig.pushString == null || UserConfig.pushString.length() == 0) {
+ FileLog.d("tmessages", "GCM Registration not found.");
+ Intent intent = new Intent(applicationContext, GcmRegistrationIntentService.class);
+ startService(intent);
}
} else {
FileLog.d("tmessages", "No valid Google Play Services APK found.");
@@ -359,91 +346,4 @@ public class ApplicationLoader extends Application {
}
return true;*/
}
-
- private String getRegistrationId() {
- final SharedPreferences prefs = getGCMPreferences(applicationContext);
- String registrationId = prefs.getString(PROPERTY_REG_ID, "");
- if (registrationId.length() == 0) {
- FileLog.d("tmessages", "Registration not found.");
- return "";
- }
- int registeredVersion = prefs.getInt(PROPERTY_APP_VERSION, Integer.MIN_VALUE);
- if (registeredVersion != BuildVars.BUILD_VERSION) {
- FileLog.d("tmessages", "App version changed.");
- return "";
- }
- return registrationId;
- }
-
- private SharedPreferences getGCMPreferences(Context context) {
- return getSharedPreferences(ApplicationLoader.class.getSimpleName(), Context.MODE_PRIVATE);
- }
-
- private void registerInBackground() {
- AsyncTask task = new AsyncTask() {
- @Override
- protected Boolean doInBackground(String... objects) {
- if (gcm == null) {
- gcm = GoogleCloudMessaging.getInstance(applicationContext);
- }
- int count = 0;
- while (count < 1000) {
- try {
- count++;
- regid = gcm.register(BuildVars.GCM_SENDER_ID);
- sendRegistrationIdToBackend(true);
- storeRegistrationId(applicationContext, regid);
- return true;
- } catch (Exception e) {
- FileLog.e("tmessages", e);
- }
- try {
- if (count % 20 == 0) {
- Thread.sleep(60000 * 30);
- } else {
- Thread.sleep(5000);
- }
- } catch (InterruptedException e) {
- FileLog.e("tmessages", e);
- }
- }
- return false;
- }
- };
-
- if (android.os.Build.VERSION.SDK_INT >= 11) {
- task.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR, null, null, null);
- } else {
- task.execute(null, null, null);
- }
- }
-
- private void sendRegistrationIdToBackend(final boolean isNew) {
- Utilities.stageQueue.postRunnable(new Runnable() {
- @Override
- public void run() {
- UserConfig.pushString = regid;
- UserConfig.registeredForPush = !isNew;
- UserConfig.saveConfig(false);
- if (UserConfig.getClientUserId() != 0) {
- AndroidUtilities.runOnUIThread(new Runnable() {
- @Override
- public void run() {
- MessagesController.getInstance().registerForPush(regid);
- }
- });
- }
- }
- });
- }
-
- private void storeRegistrationId(Context context, String regId) {
- final SharedPreferences prefs = getGCMPreferences(context);
- int appVersion = BuildVars.BUILD_VERSION;
- FileLog.e("tmessages", "Saving regId on app version " + appVersion);
- SharedPreferences.Editor editor = prefs.edit();
- editor.putString(PROPERTY_REG_ID, regId);
- editor.putInt(PROPERTY_APP_VERSION, appVersion);
- editor.commit();
- }
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AuthenticatorService.java b/TMessagesProj/src/main/java/org/telegram/messenger/AuthenticatorService.java
index 60984c1b5..82bd4f4bf 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AuthenticatorService.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AuthenticatorService.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageHeardReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageHeardReceiver.java
index bc08936f1..dd3baa741 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageHeardReceiver.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageHeardReceiver.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageReplyReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageReplyReceiver.java
index d15b7cd25..ba91e0f0a 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageReplyReceiver.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/AutoMessageReplyReceiver.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -31,7 +31,7 @@ public class AutoMessageReplyReceiver extends BroadcastReceiver {
if (dialog_id == 0 || max_id == 0) {
return;
}
- SendMessagesHelper.getInstance().sendMessage(text.toString(), dialog_id, null, null, true, false);
+ SendMessagesHelper.getInstance().sendMessage(text.toString(), dialog_id, null, null, true, false, null, null);
MessagesController.getInstance().markDialogAsRead(dialog_id, max_id, max_id, 0, true, false);
}
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/Bitmaps.java b/TMessagesProj/src/main/java/org/telegram/messenger/Bitmaps.java
index c9ee2c4ef..fe3724626 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/Bitmaps.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/Bitmaps.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java b/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java
index 303034bf9..771ff364c 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/BuildVars.java
@@ -10,7 +10,7 @@ package org.telegram.messenger;
public class BuildVars {
public static boolean DEBUG_VERSION = false;
- public static int BUILD_VERSION = 695;
+ public static int BUILD_VERSION = 719;
public static int APP_ID = 0; //obtain your own APP_ID at https://core.telegram.org/api/obtaining_api_id
public static String APP_HASH = ""; //obtain your own APP_HASH at https://core.telegram.org/api/obtaining_api_id
public static String HOCKEY_APP_HASH = "your-hockeyapp-api-key-here";
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java b/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java
index b38d244b4..b3b58216b 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ChatObject.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ClearCacheService.java b/TMessagesProj/src/main/java/org/telegram/messenger/ClearCacheService.java
index e0ee3635b..39d1a438e 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ClearCacheService.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ClearCacheService.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ContactsController.java b/TMessagesProj/src/main/java/org/telegram/messenger/ContactsController.java
index 4b4a9833b..676f20358 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ContactsController.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ContactsController.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ContactsSyncAdapterService.java b/TMessagesProj/src/main/java/org/telegram/messenger/ContactsSyncAdapterService.java
index 1dac54c94..6a08d0f87 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ContactsSyncAdapterService.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ContactsSyncAdapterService.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/DispatchQueue.java b/TMessagesProj/src/main/java/org/telegram/messenger/DispatchQueue.java
index b28d0a6e6..6dce277ec 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/DispatchQueue.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/DispatchQueue.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/DownloadObject.java b/TMessagesProj/src/main/java/org/telegram/messenger/DownloadObject.java
index 9bfb579ce..2a48e1c02 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/DownloadObject.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/DownloadObject.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java b/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java
index 72992fc2f..49c0dc44f 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/Emoji.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/EmojiData.java b/TMessagesProj/src/main/java/org/telegram/messenger/EmojiData.java
index 4fdbde10e..b45f53b40 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/EmojiData.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/EmojiData.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java b/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java
index 295729f0b..294314cdb 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/FileLoadOperation.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -22,9 +22,9 @@ import java.util.Scanner;
public class FileLoadOperation {
private static class RequestInfo {
- private int requestToken = 0;
- private int offset = 0;
- private TLRPC.TL_upload_file response = null;
+ private int requestToken;
+ private int offset;
+ private TLRPC.TL_upload_file response;
}
private final static int stateIdle = 0;
@@ -51,7 +51,7 @@ public class FileLoadOperation {
private int requestsCount;
private int renameRetryCount;
- private int nextDownloadOffset = 0;
+ private int nextDownloadOffset;
private ArrayList requestInfos;
private ArrayList delayedRequestInfos;
@@ -62,9 +62,9 @@ public class FileLoadOperation {
private String ext;
private RandomAccessFile fileOutputStream;
private RandomAccessFile fiv;
- private File storePath = null;
- private File tempPath = null;
- private boolean isForceRequest = false;
+ private File storePath;
+ private File tempPath;
+ private boolean isForceRequest;
public interface FileLoadOperationDelegate {
void didFinishLoadingFile(FileLoadOperation operation, File finalFile);
@@ -143,19 +143,23 @@ public class FileLoadOperation {
key = documentLocation.key;
} else if (documentLocation instanceof TLRPC.TL_document) {
location = new TLRPC.TL_inputDocumentFileLocation();
- datacenter_id = documentLocation.dc_id;
location.id = documentLocation.id;
location.access_hash = documentLocation.access_hash;
+ datacenter_id = documentLocation.dc_id;
}
- totalBytesCount = documentLocation.size;
- ext = FileLoader.getDocumentFileName(documentLocation);
- int idx;
- if (ext == null || (idx = ext.lastIndexOf(".")) == -1) {
- ext = "";
- } else {
- ext = ext.substring(idx);
- if (ext.length() <= 1) {
+ if (totalBytesCount <= 0) {
+ totalBytesCount = documentLocation.size;
+ }
+ if (ext == null) {
+ ext = FileLoader.getDocumentFileName(documentLocation);
+ int idx;
+ if (ext == null || (idx = ext.lastIndexOf(".")) == -1) {
ext = "";
+ } else {
+ ext = ext.substring(idx);
+ if (ext.length() <= 1) {
+ ext = "";
+ }
}
}
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/FileLoader.java b/TMessagesProj/src/main/java/org/telegram/messenger/FileLoader.java
index 8b7f8ef16..836cb9e96 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/FileLoader.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/FileLoader.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -592,16 +592,6 @@ public class FileLoader {
return new File("");
}
- public static File getExistPathToAttach(TLObject attach) {
- File path = getInstance().getDirectory(MEDIA_DIR_CACHE);
- String fileName = getAttachFileName(attach);
- File attachPath = new File(path, fileName);
- if (attachPath.exists()) {
- return attachPath;
- }
- return getPathToAttach(attach);
- }
-
public static File getPathToAttach(TLObject attach) {
return getPathToAttach(attach, null, false);
}
@@ -703,7 +693,8 @@ public class FileLoader {
if (document.file_name != null) {
return document.file_name;
}
- for (TLRPC.DocumentAttribute documentAttribute : document.attributes) {
+ for (int a = 0; a < document.attributes.size(); a++) {
+ TLRPC.DocumentAttribute documentAttribute = document.attributes.get(a);
if (documentAttribute instanceof TLRPC.TL_documentAttributeFilename) {
return documentAttribute.file_name;
}
@@ -722,12 +713,15 @@ public class FileLoader {
return video.dc_id + "_" + video.id + "." + (ext != null ? ext : "mp4");
} else if (attach instanceof TLRPC.Document) {
TLRPC.Document document = (TLRPC.Document) attach;
- String docExt = getDocumentFileName(document);
- int idx;
- if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
- docExt = "";
- } else {
- docExt = docExt.substring(idx);
+ String docExt = null;
+ if (docExt == null) {
+ docExt = getDocumentFileName(document);
+ int idx;
+ if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
+ docExt = "";
+ } else {
+ docExt = docExt.substring(idx);
+ }
}
if (docExt.length() > 1) {
return document.dc_id + "_" + document.id + docExt;
@@ -736,7 +730,7 @@ public class FileLoader {
}
} else if (attach instanceof TLRPC.PhotoSize) {
TLRPC.PhotoSize photo = (TLRPC.PhotoSize) attach;
- if (photo.location == null) {
+ if (photo.location == null || photo.location instanceof TLRPC.TL_fileLocationUnavailable) {
return "";
}
return photo.location.volume_id + "_" + photo.location.local_id + "." + (ext != null ? ext : "jpg");
@@ -744,6 +738,9 @@ public class FileLoader {
TLRPC.Audio audio = (TLRPC.Audio) attach;
return audio.dc_id + "_" + audio.id + "." + (ext != null ? ext : "ogg");
} else if (attach instanceof TLRPC.FileLocation) {
+ if (attach instanceof TLRPC.TL_fileLocationUnavailable) {
+ return "";
+ }
TLRPC.FileLocation location = (TLRPC.FileLocation) attach;
return location.volume_id + "_" + location.local_id + "." + (ext != null ? ext : "jpg");
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/FileLog.java b/TMessagesProj/src/main/java/org/telegram/messenger/FileLog.java
index b20f09ae1..580f3ac99 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/FileLog.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/FileLog.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/FileUploadOperation.java b/TMessagesProj/src/main/java/org/telegram/messenger/FileUploadOperation.java
index e60cc2892..0cdf5e64f 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/FileUploadOperation.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/FileUploadOperation.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/GcmBroadcastReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/GcmBroadcastReceiver.java
deleted file mode 100644
index 7a18c9e1b..000000000
--- a/TMessagesProj/src/main/java/org/telegram/messenger/GcmBroadcastReceiver.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * This is the source code of Telegram for Android v. 3.x.x.
- * It is licensed under GNU GPL v. 2 or later.
- * You should have received a copy of the license in this archive (see LICENSE).
- *
- * Copyright Nikolai Kudashov, 2013-2015.
- */
-
-package org.telegram.messenger;
-
-import android.app.Activity;
-import android.content.BroadcastReceiver;
-import android.content.Context;
-import android.content.Intent;
-
-import org.json.JSONObject;
-import org.telegram.tgnet.ConnectionsManager;
-
-public class GcmBroadcastReceiver extends BroadcastReceiver {
-
- public static final int NOTIFICATION_ID = 1;
-
- @Override
- public void onReceive(final Context context, final Intent intent) {
- FileLog.d("tmessages", "GCM received intent: " + intent);
-
- if (intent.getAction().equals("com.google.android.c2dm.intent.RECEIVE")) {
- AndroidUtilities.runOnUIThread(new Runnable() {
- @Override
- public void run() {
- ApplicationLoader.postInitApplication();
-
- try {
- String key = intent.getStringExtra("loc_key");
- if ("DC_UPDATE".equals(key)) {
- String data = intent.getStringExtra("custom");
- JSONObject object = new JSONObject(data);
- int dc = object.getInt("dc");
- String addr = object.getString("addr");
- String[] parts = addr.split(":");
- if (parts.length != 2) {
- return;
- }
- String ip = parts[0];
- int port = Integer.parseInt(parts[1]);
- ConnectionsManager.getInstance().applyDatacenterAddress(dc, ip, port);
- }
- } catch (Exception e) {
- FileLog.e("tmessages", e);
- }
-
- ConnectionsManager.getInstance().resumeNetworkMaybe();
- }
- });
- } else if (intent.getAction().equals("com.google.android.c2dm.intent.REGISTRATION")) {
- String registration = intent.getStringExtra("registration_id");
- if (intent.getStringExtra("error") != null) {
- FileLog.e("tmessages", "Registration failed, should try again later.");
- } else if (intent.getStringExtra("unregistered") != null) {
- FileLog.e("tmessages", "unregistration done, new messages from the authorized sender will be rejected");
- } else if (registration != null) {
- FileLog.e("tmessages", "registration id = " + registration);
- }
- }
-
- setResultCode(Activity.RESULT_OK);
- }
-}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/GcmInstanceIDListenerService.java b/TMessagesProj/src/main/java/org/telegram/messenger/GcmInstanceIDListenerService.java
new file mode 100644
index 000000000..71f9cff76
--- /dev/null
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/GcmInstanceIDListenerService.java
@@ -0,0 +1,28 @@
+/*
+ * This is the source code of Telegram for Android v. 3.x.x.
+ * It is licensed under GNU GPL v. 2 or later.
+ * You should have received a copy of the license in this archive (see LICENSE).
+ *
+ * Copyright Nikolai Kudashov, 2013-2016.
+ */
+
+package org.telegram.messenger;
+
+import android.content.Intent;
+
+import com.google.android.gms.iid.InstanceIDListenerService;
+
+public class GcmInstanceIDListenerService extends InstanceIDListenerService {
+
+ @Override
+ public void onTokenRefresh() {
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ ApplicationLoader.postInitApplication();
+ Intent intent = new Intent(ApplicationLoader.applicationContext, GcmRegistrationIntentService.class);
+ startService(intent);
+ }
+ });
+ }
+}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/GcmPushListenerService.java b/TMessagesProj/src/main/java/org/telegram/messenger/GcmPushListenerService.java
new file mode 100644
index 000000000..f683a7251
--- /dev/null
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/GcmPushListenerService.java
@@ -0,0 +1,53 @@
+/*
+ * This is the source code of Telegram for Android v. 3.x.x.
+ * It is licensed under GNU GPL v. 2 or later.
+ * You should have received a copy of the license in this archive (see LICENSE).
+ *
+ * Copyright Nikolai Kudashov, 2013-2016.
+ */
+
+package org.telegram.messenger;
+
+import android.os.Bundle;
+
+import com.google.android.gms.gcm.GcmListenerService;
+
+import org.json.JSONObject;
+import org.telegram.tgnet.ConnectionsManager;
+
+public class GcmPushListenerService extends GcmListenerService {
+
+ public static final int NOTIFICATION_ID = 1;
+
+ @Override
+ public void onMessageReceived(String from, final Bundle bundle) {
+ FileLog.d("tmessages", "GCM received bundle: " + bundle + " from: " + from);
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ ApplicationLoader.postInitApplication();
+
+ try {
+ String key = bundle.getString("loc_key");
+ if ("DC_UPDATE".equals(key)) {
+ String data = bundle.getString("custom");
+ JSONObject object = new JSONObject(data);
+ int dc = object.getInt("dc");
+ String addr = object.getString("addr");
+ String[] parts = addr.split(":");
+ if (parts.length != 2) {
+ return;
+ }
+ String ip = parts[0];
+ int port = Integer.parseInt(parts[1]);
+ ConnectionsManager.getInstance().applyDatacenterAddress(dc, ip, port);
+ }
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ }
+
+ ConnectionsManager.getInstance().resumeNetworkMaybe();
+ }
+ });
+ }
+}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/GcmRegistrationIntentService.java b/TMessagesProj/src/main/java/org/telegram/messenger/GcmRegistrationIntentService.java
new file mode 100644
index 000000000..c135cce48
--- /dev/null
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/GcmRegistrationIntentService.java
@@ -0,0 +1,68 @@
+/*
+ * This is the source code of Telegram for Android v. 3.x.x.
+ * It is licensed under GNU GPL v. 2 or later.
+ * You should have received a copy of the license in this archive (see LICENSE).
+ *
+ * Copyright Nikolai Kudashov, 2013-2016.
+ */
+
+package org.telegram.messenger;
+
+import android.app.IntentService;
+import android.content.Intent;
+
+import com.google.android.gms.gcm.GoogleCloudMessaging;
+import com.google.android.gms.iid.InstanceID;
+
+public class GcmRegistrationIntentService extends IntentService {
+
+ public GcmRegistrationIntentService() {
+ super("GcmRegistrationIntentService");
+ }
+
+ @Override
+ protected void onHandleIntent(Intent intent) {
+ try {
+ InstanceID instanceID = InstanceID.getInstance(this);
+ String token = instanceID.getToken(BuildVars.GCM_SENDER_ID, GoogleCloudMessaging.INSTANCE_ID_SCOPE, null);
+ FileLog.d("tmessages", "GCM Registration Token: " + token);
+ sendRegistrationToServer(token);
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ final int failCount = intent.getIntExtra("failCount", 0);
+ if (failCount < 60) {
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ Intent intent = new Intent(ApplicationLoader.applicationContext, GcmRegistrationIntentService.class);
+ intent.putExtra("failCount", failCount + 1);
+ startService(intent);
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ }
+ }
+ }, failCount < 20 ? 10000 : 60000 * 30);
+ }
+ }
+ }
+
+ private void sendRegistrationToServer(final String token) {
+ Utilities.stageQueue.postRunnable(new Runnable() {
+ @Override
+ public void run() {
+ UserConfig.pushString = token;
+ UserConfig.registeredForPush = false;
+ UserConfig.saveConfig(false);
+ if (UserConfig.getClientUserId() != 0) {
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ MessagesController.getInstance().registerForPush(token);
+ }
+ });
+ }
+ }
+ });
+ }
+}
\ No newline at end of file
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ImageLoader.java b/TMessagesProj/src/main/java/org/telegram/messenger/ImageLoader.java
index 453e37e51..285fdb68c 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ImageLoader.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ImageLoader.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -26,8 +26,10 @@ import android.os.Environment;
import android.os.ParcelFileDescriptor;
import android.provider.MediaStore;
+import org.telegram.tgnet.ConnectionsManager;
import org.telegram.tgnet.TLObject;
import org.telegram.tgnet.TLRPC;
+import org.telegram.ui.Components.AnimatedFileDrawable;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -38,6 +40,8 @@ import java.io.InputStream;
import java.io.RandomAccessFile;
import java.lang.reflect.Method;
import java.net.HttpURLConnection;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.net.UnknownHostException;
@@ -46,6 +50,8 @@ import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class ImageLoader {
@@ -266,6 +272,17 @@ public class ImageLoader {
fileOutputStream = new RandomAccessFile(cacheImage.tempFilePath, "rws");
}
} catch (Throwable e) {
+ if (e instanceof SocketTimeoutException) {
+ if (ConnectionsManager.isNetworkOnline()) {
+ canRetry = false;
+ }
+ } else if (e instanceof UnknownHostException) {
+ canRetry = false;
+ } else if (e instanceof SocketException) {
+ if (e.getMessage() != null && e.getMessage().contains("ECONNRESET")) {
+ canRetry = false;
+ }
+ }
FileLog.e("tmessages", e);
}
}
@@ -281,10 +298,26 @@ public class ImageLoader {
} catch (Exception e) {
FileLog.e("tmessages", e);
}
+ if (imageSize == 0 && httpConnection != null) {
+ try {
+ Map> headerFields = httpConnection.getHeaderFields();
+ if (headerFields != null) {
+ List values = headerFields.get("content-Length");
+ if (values != null && !values.isEmpty()) {
+ String length = (String) values.get(0);
+ if (length != null) {
+ imageSize = Utilities.parseInt(length);
+ }
+ }
+ }
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ }
+ }
if (httpConnectionStream != null) {
try {
- byte[] data = new byte[1024 * 2];
+ byte[] data = new byte[1024 * 8];
int totalLoaded = 0;
while (true) {
if (isCancelled()) {
@@ -530,224 +563,80 @@ public class ImageLoader {
}
}
- Long mediaId = null;
- boolean mediaIsVideo = false;
- Bitmap image = null;
- File cacheFileFinal = cacheImage.finalFilePath;
- boolean canDeleteFile = true;
- boolean useNativeWebpLoaded = false;
-
- if (Build.VERSION.SDK_INT < 19) {
- RandomAccessFile randomAccessFile = null;
- try {
- randomAccessFile = new RandomAccessFile(cacheFileFinal, "r");
- byte[] bytes;
- if (cacheImage.thumb) {
- bytes = headerThumb;
- } else {
- bytes = header;
- }
- randomAccessFile.readFully(bytes, 0, bytes.length);
- String str = new String(bytes).toLowerCase();
- str = str.toLowerCase();
- if (str.startsWith("riff") && str.endsWith("webp")) {
- useNativeWebpLoaded = true;
- }
- randomAccessFile.close();
- } catch (Exception e) {
- FileLog.e("tmessages", e);
- } finally {
- if (randomAccessFile != null) {
- try {
- randomAccessFile.close();
- } catch (Exception e) {
- FileLog.e("tmessages", e);
- }
+ if (cacheImage.animatedFile) {
+ synchronized (sync) {
+ if (isCancelled) {
+ return;
}
}
- }
-
- if (cacheImage.thumb) {
- int blurType = 0;
- if (cacheImage.filter != null) {
- if (cacheImage.filter.contains("b2")) {
- blurType = 3;
- } else if (cacheImage.filter.contains("b1")) {
- blurType = 2;
- } else if (cacheImage.filter.contains("b")) {
- blurType = 1;
- }
- }
-
- try {
- lastCacheOutTime = System.currentTimeMillis();
- synchronized (sync) {
- if (isCancelled) {
- return;
- }
- }
-
- BitmapFactory.Options opts = new BitmapFactory.Options();
- opts.inSampleSize = 1;
-
- if (Build.VERSION.SDK_INT >= 14 && Build.VERSION.SDK_INT < 21) {
- opts.inPurgeable = true;
- }
-
- if (useNativeWebpLoaded) {
- RandomAccessFile file = new RandomAccessFile(cacheFileFinal, "r");
- ByteBuffer buffer = file.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, cacheFileFinal.length());
-
- BitmapFactory.Options bmOptions = new BitmapFactory.Options();
- bmOptions.inJustDecodeBounds = true;
- Utilities.loadWebpImage(null, buffer, buffer.limit(), bmOptions, true);
- image = Bitmaps.createBitmap(bmOptions.outWidth, bmOptions.outHeight, Bitmap.Config.ARGB_8888);
-
- Utilities.loadWebpImage(image, buffer, buffer.limit(), null, !opts.inPurgeable);
- file.close();
- } else {
- if (opts.inPurgeable) {
- RandomAccessFile f = new RandomAccessFile(cacheFileFinal, "r");
- int len = (int) f.length();
- byte[] data = bytesThumb != null && bytesThumb.length >= len ? bytesThumb : null;
- if (data == null) {
- bytesThumb = data = new byte[len];
- }
- f.readFully(data, 0, len);
- image = BitmapFactory.decodeByteArray(data, 0, len, opts);
- } else {
- FileInputStream is = new FileInputStream(cacheFileFinal);
- image = BitmapFactory.decodeStream(is, null, opts);
- is.close();
- }
- }
-
- if (image == null) {
- if (cacheFileFinal.length() == 0 || cacheImage.filter == null) {
- cacheFileFinal.delete();
- }
- } else {
- if (blurType == 1) {
- Utilities.blurBitmap(image, 3, opts.inPurgeable ? 0 : 1);
- } else if (blurType == 2) {
- Utilities.blurBitmap(image, 1, opts.inPurgeable ? 0 : 1);
- } else if (blurType == 3) {
- Utilities.blurBitmap(image, 7, opts.inPurgeable ? 0 : 1);
- Utilities.blurBitmap(image, 7, opts.inPurgeable ? 0 : 1);
- Utilities.blurBitmap(image, 7, opts.inPurgeable ? 0 : 1);
- } else if (blurType == 0 && opts.inPurgeable) {
- Utilities.pinBitmap(image);
- }
- if (runtimeHack != null) {
- runtimeHack.trackFree(image.getRowBytes() * image.getHeight());
- }
- }
- } catch (Throwable e) {
- FileLog.e("tmessages", e);
- }
+ AnimatedFileDrawable fileDrawable = new AnimatedFileDrawable(cacheImage.finalFilePath, cacheImage.filter != null && cacheImage.filter.equals("d"));
+ Thread.interrupted();
+ onPostExecute(fileDrawable);
} else {
- try {
- if (cacheImage.httpUrl != null) {
- if (cacheImage.httpUrl.startsWith("thumb://")) {
- int idx = cacheImage.httpUrl.indexOf(":", 8);
- if (idx >= 0) {
- mediaId = Long.parseLong(cacheImage.httpUrl.substring(8, idx));
- mediaIsVideo = false;
- }
- canDeleteFile = false;
- } else if (cacheImage.httpUrl.startsWith("vthumb://")) {
- int idx = cacheImage.httpUrl.indexOf(":", 9);
- if (idx >= 0) {
- mediaId = Long.parseLong(cacheImage.httpUrl.substring(9, idx));
- mediaIsVideo = true;
- }
- canDeleteFile = false;
- } else if (!cacheImage.httpUrl.startsWith("http")) {
- canDeleteFile = false;
- }
- }
+ Long mediaId = null;
+ boolean mediaIsVideo = false;
+ Bitmap image = null;
+ File cacheFileFinal = cacheImage.finalFilePath;
+ boolean canDeleteFile = true;
+ boolean useNativeWebpLoaded = false;
- int delay = 20;
- if (runtimeHack != null) {
- delay = 60;
- }
- if (mediaId != null) {
- delay = 0;
- }
- if (delay != 0 && lastCacheOutTime != 0 && lastCacheOutTime > System.currentTimeMillis() - delay && Build.VERSION.SDK_INT < 21) {
- Thread.sleep(delay);
- }
- lastCacheOutTime = System.currentTimeMillis();
- synchronized (sync) {
- if (isCancelled) {
- return;
- }
- }
-
- BitmapFactory.Options opts = new BitmapFactory.Options();
- opts.inSampleSize = 1;
-
- float w_filter = 0;
- float h_filter = 0;
- boolean blur = false;
- if (cacheImage.filter != null) {
- String args[] = cacheImage.filter.split("_");
- if (args.length >= 2) {
- w_filter = Float.parseFloat(args[0]) * AndroidUtilities.density;
- h_filter = Float.parseFloat(args[1]) * AndroidUtilities.density;
- }
- if (cacheImage.filter.contains("b")) {
- blur = true;
- }
- if (w_filter != 0 && h_filter != 0) {
- opts.inJustDecodeBounds = true;
-
- if (mediaId != null) {
- if (mediaIsVideo) {
- MediaStore.Video.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Video.Thumbnails.MINI_KIND, opts);
- } else {
- MediaStore.Images.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Images.Thumbnails.MINI_KIND, opts);
- }
- } else {
- FileInputStream is = new FileInputStream(cacheFileFinal);
- image = BitmapFactory.decodeStream(is, null, opts);
- is.close();
- }
-
- float photoW = opts.outWidth;
- float photoH = opts.outHeight;
- float scaleFactor = Math.max(photoW / w_filter, photoH / h_filter);
- if (scaleFactor < 1) {
- scaleFactor = 1;
- }
- opts.inJustDecodeBounds = false;
- opts.inSampleSize = (int) scaleFactor;
- }
- }
- synchronized (sync) {
- if (isCancelled) {
- return;
- }
- }
-
- if (cacheImage.filter == null || blur || cacheImage.httpUrl != null) {
- opts.inPreferredConfig = Bitmap.Config.ARGB_8888;
- } else {
- opts.inPreferredConfig = Bitmap.Config.RGB_565;
- }
- if (Build.VERSION.SDK_INT >= 14 && Build.VERSION.SDK_INT < 21) {
- opts.inPurgeable = true;
- }
-
- opts.inDither = false;
- if (mediaId != null) {
- if (mediaIsVideo) {
- image = MediaStore.Video.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Video.Thumbnails.MINI_KIND, opts);
+ if (Build.VERSION.SDK_INT < 19) {
+ RandomAccessFile randomAccessFile = null;
+ try {
+ randomAccessFile = new RandomAccessFile(cacheFileFinal, "r");
+ byte[] bytes;
+ if (cacheImage.thumb) {
+ bytes = headerThumb;
} else {
- image = MediaStore.Images.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Images.Thumbnails.MINI_KIND, opts);
+ bytes = header;
+ }
+ randomAccessFile.readFully(bytes, 0, bytes.length);
+ String str = new String(bytes).toLowerCase();
+ str = str.toLowerCase();
+ if (str.startsWith("riff") && str.endsWith("webp")) {
+ useNativeWebpLoaded = true;
+ }
+ randomAccessFile.close();
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ } finally {
+ if (randomAccessFile != null) {
+ try {
+ randomAccessFile.close();
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ }
}
}
- if (image == null) {
+ }
+
+ if (cacheImage.thumb) {
+ int blurType = 0;
+ if (cacheImage.filter != null) {
+ if (cacheImage.filter.contains("b2")) {
+ blurType = 3;
+ } else if (cacheImage.filter.contains("b1")) {
+ blurType = 2;
+ } else if (cacheImage.filter.contains("b")) {
+ blurType = 1;
+ }
+ }
+
+ try {
+ lastCacheOutTime = System.currentTimeMillis();
+ synchronized (sync) {
+ if (isCancelled) {
+ return;
+ }
+ }
+
+ BitmapFactory.Options opts = new BitmapFactory.Options();
+ opts.inSampleSize = 1;
+
+ if (Build.VERSION.SDK_INT >= 14 && Build.VERSION.SDK_INT < 21) {
+ opts.inPurgeable = true;
+ }
+
if (useNativeWebpLoaded) {
RandomAccessFile file = new RandomAccessFile(cacheFileFinal, "r");
ByteBuffer buffer = file.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, cacheFileFinal.length());
@@ -763,9 +652,9 @@ public class ImageLoader {
if (opts.inPurgeable) {
RandomAccessFile f = new RandomAccessFile(cacheFileFinal, "r");
int len = (int) f.length();
- byte[] data = bytes != null && bytes.length >= len ? bytes : null;
+ byte[] data = bytesThumb != null && bytesThumb.length >= len ? bytesThumb : null;
if (data == null) {
- bytes = data = new byte[len];
+ bytesThumb = data = new byte[len];
}
f.readFully(data, 0, len);
image = BitmapFactory.decodeByteArray(data, 0, len, opts);
@@ -775,42 +664,205 @@ public class ImageLoader {
is.close();
}
}
- }
- if (image == null) {
- if (canDeleteFile && (cacheFileFinal.length() == 0 || cacheImage.filter == null)) {
- cacheFileFinal.delete();
+
+ if (image == null) {
+ if (cacheFileFinal.length() == 0 || cacheImage.filter == null) {
+ cacheFileFinal.delete();
+ }
+ } else {
+ if (blurType == 1) {
+ if (image.getConfig() == Bitmap.Config.ARGB_8888) {
+ Utilities.blurBitmap(image, 3, opts.inPurgeable ? 0 : 1, image.getWidth(), image.getHeight(), image.getRowBytes());
+ }
+ } else if (blurType == 2) {
+ if (image.getConfig() == Bitmap.Config.ARGB_8888) {
+ Utilities.blurBitmap(image, 1, opts.inPurgeable ? 0 : 1, image.getWidth(), image.getHeight(), image.getRowBytes());
+ }
+ } else if (blurType == 3) {
+ if (image.getConfig() == Bitmap.Config.ARGB_8888) {
+ Utilities.blurBitmap(image, 7, opts.inPurgeable ? 0 : 1, image.getWidth(), image.getHeight(), image.getRowBytes());
+ Utilities.blurBitmap(image, 7, opts.inPurgeable ? 0 : 1, image.getWidth(), image.getHeight(), image.getRowBytes());
+ Utilities.blurBitmap(image, 7, opts.inPurgeable ? 0 : 1, image.getWidth(), image.getHeight(), image.getRowBytes());
+ }
+ } else if (blurType == 0 && opts.inPurgeable) {
+ Utilities.pinBitmap(image);
+ }
+ if (runtimeHack != null) {
+ runtimeHack.trackFree(image.getRowBytes() * image.getHeight());
+ }
}
- } else {
- boolean blured = false;
+ } catch (Throwable e) {
+ FileLog.e("tmessages", e);
+ }
+ } else {
+ try {
+ if (cacheImage.httpUrl != null) {
+ if (cacheImage.httpUrl.startsWith("thumb://")) {
+ int idx = cacheImage.httpUrl.indexOf(":", 8);
+ if (idx >= 0) {
+ mediaId = Long.parseLong(cacheImage.httpUrl.substring(8, idx));
+ mediaIsVideo = false;
+ }
+ canDeleteFile = false;
+ } else if (cacheImage.httpUrl.startsWith("vthumb://")) {
+ int idx = cacheImage.httpUrl.indexOf(":", 9);
+ if (idx >= 0) {
+ mediaId = Long.parseLong(cacheImage.httpUrl.substring(9, idx));
+ mediaIsVideo = true;
+ }
+ canDeleteFile = false;
+ } else if (!cacheImage.httpUrl.startsWith("http")) {
+ canDeleteFile = false;
+ }
+ }
+
+ int delay = 20;
+ if (runtimeHack != null) {
+ delay = 60;
+ }
+ if (mediaId != null) {
+ delay = 0;
+ }
+ if (delay != 0 && lastCacheOutTime != 0 && lastCacheOutTime > System.currentTimeMillis() - delay && Build.VERSION.SDK_INT < 21) {
+ Thread.sleep(delay);
+ }
+ lastCacheOutTime = System.currentTimeMillis();
+ synchronized (sync) {
+ if (isCancelled) {
+ return;
+ }
+ }
+
+ BitmapFactory.Options opts = new BitmapFactory.Options();
+ opts.inSampleSize = 1;
+
+ float w_filter = 0;
+ float h_filter = 0;
+ boolean blur = false;
if (cacheImage.filter != null) {
- float bitmapW = image.getWidth();
- float bitmapH = image.getHeight();
- if (!opts.inPurgeable && w_filter != 0 && bitmapW != w_filter && bitmapW > w_filter + 20) {
- float scaleFactor = bitmapW / w_filter;
- Bitmap scaledBitmap = Bitmaps.createScaledBitmap(image, (int) w_filter, (int) (bitmapH / scaleFactor), true);
- if (image != scaledBitmap) {
- image.recycle();
- image = scaledBitmap;
+ String args[] = cacheImage.filter.split("_");
+ if (args.length >= 2) {
+ w_filter = Float.parseFloat(args[0]) * AndroidUtilities.density;
+ h_filter = Float.parseFloat(args[1]) * AndroidUtilities.density;
+ }
+ if (cacheImage.filter.contains("b")) {
+ blur = true;
+ }
+ if (w_filter != 0 && h_filter != 0) {
+ opts.inJustDecodeBounds = true;
+
+ if (mediaId != null) {
+ if (mediaIsVideo) {
+ MediaStore.Video.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Video.Thumbnails.MINI_KIND, opts);
+ } else {
+ MediaStore.Images.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Images.Thumbnails.MINI_KIND, opts);
+ }
+ } else {
+ FileInputStream is = new FileInputStream(cacheFileFinal);
+ image = BitmapFactory.decodeStream(is, null, opts);
+ is.close();
+ }
+
+ float photoW = opts.outWidth;
+ float photoH = opts.outHeight;
+ float scaleFactor = Math.max(photoW / w_filter, photoH / h_filter);
+ if (scaleFactor < 1) {
+ scaleFactor = 1;
+ }
+ opts.inJustDecodeBounds = false;
+ opts.inSampleSize = (int) scaleFactor;
+ }
+ }
+ synchronized (sync) {
+ if (isCancelled) {
+ return;
+ }
+ }
+
+ if (cacheImage.filter == null || blur || cacheImage.httpUrl != null) {
+ opts.inPreferredConfig = Bitmap.Config.ARGB_8888;
+ } else {
+ opts.inPreferredConfig = Bitmap.Config.RGB_565;
+ }
+ if (Build.VERSION.SDK_INT >= 14 && Build.VERSION.SDK_INT < 21) {
+ opts.inPurgeable = true;
+ }
+
+ opts.inDither = false;
+ if (mediaId != null) {
+ if (mediaIsVideo) {
+ image = MediaStore.Video.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Video.Thumbnails.MINI_KIND, opts);
+ } else {
+ image = MediaStore.Images.Thumbnails.getThumbnail(ApplicationLoader.applicationContext.getContentResolver(), mediaId, MediaStore.Images.Thumbnails.MINI_KIND, opts);
+ }
+ }
+ if (image == null) {
+ if (useNativeWebpLoaded) {
+ RandomAccessFile file = new RandomAccessFile(cacheFileFinal, "r");
+ ByteBuffer buffer = file.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, cacheFileFinal.length());
+
+ BitmapFactory.Options bmOptions = new BitmapFactory.Options();
+ bmOptions.inJustDecodeBounds = true;
+ Utilities.loadWebpImage(null, buffer, buffer.limit(), bmOptions, true);
+ image = Bitmaps.createBitmap(bmOptions.outWidth, bmOptions.outHeight, Bitmap.Config.ARGB_8888);
+
+ Utilities.loadWebpImage(image, buffer, buffer.limit(), null, !opts.inPurgeable);
+ file.close();
+ } else {
+ if (opts.inPurgeable) {
+ RandomAccessFile f = new RandomAccessFile(cacheFileFinal, "r");
+ int len = (int) f.length();
+ byte[] data = bytes != null && bytes.length >= len ? bytes : null;
+ if (data == null) {
+ bytes = data = new byte[len];
+ }
+ f.readFully(data, 0, len);
+ image = BitmapFactory.decodeByteArray(data, 0, len, opts);
+ } else {
+ FileInputStream is = new FileInputStream(cacheFileFinal);
+ image = BitmapFactory.decodeStream(is, null, opts);
+ is.close();
}
}
- if (image != null && blur && bitmapH < 100 && bitmapW < 100) {
- Utilities.blurBitmap(image, 3, opts.inPurgeable ? 0 : 1);
- blured = true;
+ }
+ if (image == null) {
+ if (canDeleteFile && (cacheFileFinal.length() == 0 || cacheImage.filter == null)) {
+ cacheFileFinal.delete();
+ }
+ } else {
+ boolean blured = false;
+ if (cacheImage.filter != null) {
+ float bitmapW = image.getWidth();
+ float bitmapH = image.getHeight();
+ if (!opts.inPurgeable && w_filter != 0 && bitmapW != w_filter && bitmapW > w_filter + 20) {
+ float scaleFactor = bitmapW / w_filter;
+ Bitmap scaledBitmap = Bitmaps.createScaledBitmap(image, (int) w_filter, (int) (bitmapH / scaleFactor), true);
+ if (image != scaledBitmap) {
+ image.recycle();
+ image = scaledBitmap;
+ }
+ }
+ if (image != null && blur && bitmapH < 100 && bitmapW < 100) {
+ if (image.getConfig() == Bitmap.Config.ARGB_8888) {
+ Utilities.blurBitmap(image, 3, opts.inPurgeable ? 0 : 1, image.getWidth(), image.getHeight(), image.getRowBytes());
+ }
+ blured = true;
+ }
+ }
+ if (!blured && opts.inPurgeable) {
+ Utilities.pinBitmap(image);
+ }
+ if (runtimeHack != null && image != null) {
+ runtimeHack.trackFree(image.getRowBytes() * image.getHeight());
}
}
- if (!blured && opts.inPurgeable) {
- Utilities.pinBitmap(image);
- }
- if (runtimeHack != null && image != null) {
- runtimeHack.trackFree(image.getRowBytes() * image.getHeight());
- }
+ } catch (Throwable e) {
+ //don't promt
}
- } catch (Throwable e) {
- //don't promt
}
+ Thread.interrupted();
+ onPostExecute(image != null ? new BitmapDrawable(image) : null);
}
- Thread.interrupted();
- onPostExecute(image != null ? new BitmapDrawable(image) : null);
}
private void onPostExecute(final BitmapDrawable bitmapDrawable) {
@@ -818,7 +870,9 @@ public class ImageLoader {
@Override
public void run() {
BitmapDrawable toSet = null;
- if (bitmapDrawable != null) {
+ if (bitmapDrawable instanceof AnimatedFileDrawable) {
+ toSet = bitmapDrawable;
+ } else if (bitmapDrawable != null) {
toSet = memCache.get(cacheImage.key);
if (toSet == null) {
memCache.put(cacheImage.key, bitmapDrawable);
@@ -909,6 +963,7 @@ public class ImageLoader {
protected String filter;
protected String ext;
protected TLObject location;
+ protected boolean animatedFile;
protected File finalFilePath;
protected File tempFilePath;
@@ -986,13 +1041,29 @@ public class ImageLoader {
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- for (ImageReceiver imgView : finalImageReceiverArray) {
- imgView.setImageBitmapByKey(image, key, thumb, false);
+ if (image instanceof AnimatedFileDrawable) {
+ boolean imageSet = false;
+ AnimatedFileDrawable fileDrawable = (AnimatedFileDrawable) image;
+ for (int a = 0; a < finalImageReceiverArray.size(); a++) {
+ ImageReceiver imgView = finalImageReceiverArray.get(a);
+ if (imgView.setImageBitmapByKey(a == 0 ? fileDrawable : fileDrawable.makeCopy(), key, thumb, false)) {
+ imageSet = true;
+ }
+ }
+ if (!imageSet) {
+ ((AnimatedFileDrawable) image).recycle();
+ }
+ } else {
+ for (int a = 0; a < finalImageReceiverArray.size(); a++) {
+ ImageReceiver imgView = finalImageReceiverArray.get(a);
+ imgView.setImageBitmapByKey(image, key, thumb, false);
+ }
}
}
});
}
- for (ImageReceiver imageReceiver : imageReceiverArray) {
+ for (int a = 0; a < imageReceiverArray.size(); a++) {
+ ImageReceiver imageReceiver = imageReceiverArray.get(a);
imageLoadingByTag.remove(imageReceiver.getTag(thumb));
}
imageReceiverArray.clear();
@@ -1117,7 +1188,7 @@ public class ImageLoader {
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- if (MediaController.getInstance().canSaveToGallery() && telegramPath != null && finalFile != null && finalFile.exists() && (location.endsWith(".mp4") || location.endsWith(".jpg"))) {
+ if (MediaController.getInstance().canSaveToGallery() && telegramPath != null && finalFile != null && (location.endsWith(".mp4") || location.endsWith(".jpg"))) {
if (finalFile.toString().startsWith(telegramPath.toString())) {
AndroidUtilities.addMediaToGallery(finalFile.toString());
}
@@ -1482,24 +1553,33 @@ public class ImageLoader {
return memCache.get(key);
}
- public void replaceImageInCache(final String oldKey, final String newKey, final TLRPC.FileLocation newLocation) {
- AndroidUtilities.runOnUIThread(new Runnable() {
- @Override
- public void run() {
- ArrayList arr = memCache.getFilterKeys(oldKey);
- if (arr != null) {
- for (String filter : arr) {
- String oldK = oldKey + "@" + filter;
- String newK = newKey + "@" + filter;
- performReplace(oldK, newK);
- NotificationCenter.getInstance().postNotificationName(NotificationCenter.didReplacedPhotoInMemCache, oldK, newK, newLocation);
- }
- } else {
- performReplace(oldKey, newKey);
- NotificationCenter.getInstance().postNotificationName(NotificationCenter.didReplacedPhotoInMemCache, oldKey, newKey, newLocation);
- }
+ private void replaceImageInCacheInternal(final String oldKey, final String newKey, final TLRPC.FileLocation newLocation) {
+ ArrayList arr = memCache.getFilterKeys(oldKey);
+ if (arr != null) {
+ for (int a = 0; a < arr.size(); a++) {
+ String filter = arr.get(a);
+ String oldK = oldKey + "@" + filter;
+ String newK = newKey + "@" + filter;
+ performReplace(oldK, newK);
+ NotificationCenter.getInstance().postNotificationName(NotificationCenter.didReplacedPhotoInMemCache, oldK, newK, newLocation);
}
- });
+ } else {
+ performReplace(oldKey, newKey);
+ NotificationCenter.getInstance().postNotificationName(NotificationCenter.didReplacedPhotoInMemCache, oldKey, newKey, newLocation);
+ }
+ }
+
+ public void replaceImageInCache(final String oldKey, final String newKey, final TLRPC.FileLocation newLocation, boolean post) {
+ if (post) {
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ replaceImageInCacheInternal(oldKey, newKey, newLocation);
+ }
+ });
+ } else {
+ replaceImageInCacheInternal(oldKey, newKey, newLocation);
+ }
}
public void putImageToCache(BitmapDrawable bitmap, String key) {
@@ -1621,15 +1701,21 @@ public class ImageLoader {
}
if (thumb != 2) {
+ CacheImage img = new CacheImage();
+ if (httpLocation != null && (httpLocation.endsWith("mp4") || httpLocation.endsWith("gif")) || imageLocation instanceof TLRPC.Document && MessageObject.isGifDocument((TLRPC.Document) imageLocation)) {
+ img.animatedFile = true;
+ }
+
if (cacheFile == null) {
if (cacheOnly || size == 0 || httpLocation != null) {
cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), url);
+ } else if (imageLocation instanceof TLRPC.Document) {
+ cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_DOCUMENT), url);
} else {
cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_IMAGE), url);
}
}
- CacheImage img = new CacheImage();
img.thumb = thumb != 0;
img.key = key;
img.filter = filter;
@@ -1654,7 +1740,7 @@ public class ImageLoader {
TLRPC.FileLocation location = (TLRPC.FileLocation) imageLocation;
FileLoader.getInstance().loadFile(location, ext, size, size == 0 || location.key != null || cacheOnly);
} else if (imageLocation instanceof TLRPC.Document) {
- FileLoader.getInstance().loadFile((TLRPC.Document) imageLocation, true, true);
+ FileLoader.getInstance().loadFile((TLRPC.Document) imageLocation, true, cacheOnly);
}
} else {
String file = Utilities.MD5(httpLocation);
@@ -1725,12 +1811,12 @@ public class ImageLoader {
saveImageToCache = true;
}
} else if (imageLocation instanceof TLRPC.Document) {
- TLRPC.Document location = (TLRPC.Document) imageLocation;
- if (location.id == 0 || location.dc_id == 0) {
+ TLRPC.Document document = (TLRPC.Document) imageLocation;
+ if (document.id == 0 || document.dc_id == 0) {
return;
}
- key = location.dc_id + "_" + location.id;
- String docExt = FileLoader.getDocumentFileName(location);
+ key = document.dc_id + "_" + document.id;
+ String docExt = FileLoader.getDocumentFileName(document);
int idx;
if (docExt == null || (idx = docExt.lastIndexOf(".")) == -1) {
docExt = "";
@@ -1744,7 +1830,7 @@ public class ImageLoader {
if (thumbKey != null) {
thumbUrl = thumbKey + "." + ext;
}
- saveImageToCache = true;
+ saveImageToCache = !MessageObject.isGifDocument(document);
}
if (imageLocation == thumbLocation) {
imageLocation = null;
@@ -1768,6 +1854,7 @@ public class ImageLoader {
}
if (httpLocation != null) {
+ createLoadOperationForImageReceiver(imageReceiver, thumbKey, thumbUrl, ext, thumbLocation, null, thumbFilter, 0, true, thumbSet ? 2 : 1);
createLoadOperationForImageReceiver(imageReceiver, key, url, ext, null, httpLocation, filter, 0, true, 0);
} else {
createLoadOperationForImageReceiver(imageReceiver, thumbKey, thumbUrl, ext, thumbLocation, null, thumbFilter, 0, true, thumbSet ? 2 : 1);
@@ -1806,7 +1893,8 @@ public class ImageLoader {
}
imageLoadingByUrl.remove(location);
CacheOutTask task = null;
- for (ImageReceiver imageReceiver : img.imageReceiverArray) {
+ for (int a = 0; a < img.imageReceiverArray.size(); a++) {
+ ImageReceiver imageReceiver = img.imageReceiverArray.get(a);
CacheImage cacheImage = imageLoadingByKeys.get(img.key);
if (cacheImage == null) {
cacheImage = new CacheImage();
@@ -1817,6 +1905,7 @@ public class ImageLoader {
cacheImage.ext = img.ext;
cacheImage.cacheTask = task = new CacheOutTask(cacheImage);
cacheImage.filter = img.filter;
+ cacheImage.animatedFile = img.animatedFile;
imageLoadingByKeys.put(cacheImage.key, cacheImage);
}
cacheImage.addImageReceiver(imageReceiver);
@@ -1851,7 +1940,7 @@ public class ImageLoader {
if (complete) {
currentHttpTasksCount--;
}
- while (currentHttpTasksCount < 1 && !httpTasks.isEmpty()) {
+ while (currentHttpTasksCount < 4 && !httpTasks.isEmpty()) {
HttpImageTask task = httpTasks.poll();
if (android.os.Build.VERSION.SDK_INT >= 11) {
task.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR, null, null, null);
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ImageReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/ImageReceiver.java
index 12ce3f535..456b7cc42 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ImageReceiver.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ImageReceiver.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -25,6 +25,7 @@ import android.view.View;
import org.telegram.tgnet.TLObject;
import org.telegram.tgnet.TLRPC;
+import org.telegram.ui.Components.AnimatedFileDrawable;
public class ImageReceiver implements NotificationCenter.NotificationCenterDelegate {
@@ -62,9 +63,10 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
private TLRPC.FileLocation currentThumbLocation;
private int currentSize;
private boolean currentCacheOnly;
- private BitmapDrawable currentImage;
- private BitmapDrawable currentThumb;
+ private Drawable currentImage;
+ private Drawable currentThumb;
private Drawable staticThumb;
+ private boolean allowStartAnimation = true;
private boolean needsQualityThumb;
private boolean shouldGenerateQualityThumb;
@@ -167,8 +169,6 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
return;
}
-
-
if (!(thumbLocation instanceof TLRPC.TL_fileLocation)) {
thumbLocation = null;
}
@@ -180,7 +180,11 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
key = location.volume_id + "_" + location.local_id;
} else {
TLRPC.Document location = (TLRPC.Document) fileLocation;
- key = location.dc_id + "_" + location.id;
+ if (location.dc_id != 0) {
+ key = location.dc_id + "_" + location.id;
+ } else {
+ fileLocation = null;
+ }
}
} else if (httpUrl != null) {
key = Utilities.MD5(httpUrl);
@@ -504,12 +508,14 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
}
}
- private void checkAlphaAnimation() {
+ private void checkAlphaAnimation(boolean skip) {
if (currentAlpha != 1) {
- long currentTime = System.currentTimeMillis();
- currentAlpha += (currentTime - lastUpdateAlphaTime) / 150.0f;
- if (currentAlpha > 1) {
- currentAlpha = 1;
+ if (!skip) {
+ long currentTime = System.currentTimeMillis();
+ currentAlpha += (currentTime - lastUpdateAlphaTime) / 150.0f;
+ if (currentAlpha > 1) {
+ currentAlpha = 1;
+ }
}
lastUpdateAlphaTime = System.currentTimeMillis();
if (parentView != null) {
@@ -524,44 +530,51 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
public boolean draw(Canvas canvas) {
try {
- BitmapDrawable bitmapDrawable = null;
- if (!forcePreview && currentImage != null) {
- bitmapDrawable = currentImage;
+ Drawable drawable = null;
+ boolean animationNotReady = currentImage instanceof AnimatedFileDrawable && !((AnimatedFileDrawable) currentImage).hasBitmap();
+ if (!forcePreview && currentImage != null && !animationNotReady) {
+ drawable = currentImage;
} else if (staticThumb instanceof BitmapDrawable) {
- bitmapDrawable = (BitmapDrawable) staticThumb;
+ drawable = staticThumb;
} else if (currentThumb != null) {
- bitmapDrawable = currentThumb;
+ drawable = currentThumb;
}
- if (bitmapDrawable != null) {
+ if (drawable != null) {
if (crossfadeAlpha != 0) {
- if (crossfadeWithThumb && currentAlpha != 1.0f) {
- Drawable thumbDrawable = null;
- if (bitmapDrawable == currentImage) {
- if (staticThumb != null) {
- thumbDrawable = staticThumb;
- } else if (currentThumb != null) {
- thumbDrawable = currentThumb;
+ if (crossfadeWithThumb && animationNotReady) {
+ drawDrawable(canvas, drawable, (int) (overrideAlpha * 255));
+ } else {
+ if (crossfadeWithThumb && currentAlpha != 1.0f) {
+ Drawable thumbDrawable = null;
+ if (drawable == currentImage) {
+ if (staticThumb != null) {
+ thumbDrawable = staticThumb;
+ } else if (currentThumb != null) {
+ thumbDrawable = currentThumb;
+ }
+ } else if (drawable == currentThumb) {
+ if (staticThumb != null) {
+ thumbDrawable = staticThumb;
+ }
}
- } else if (bitmapDrawable == currentThumb) {
- if (staticThumb != null) {
- thumbDrawable = staticThumb;
+ if (thumbDrawable != null) {
+ drawDrawable(canvas, thumbDrawable, (int) (overrideAlpha * 255));
}
}
- if (thumbDrawable != null) {
- drawDrawable(canvas, thumbDrawable, (int) (overrideAlpha * 255));
- }
+ drawDrawable(canvas, drawable, (int) (overrideAlpha * currentAlpha * 255));
}
- drawDrawable(canvas, bitmapDrawable, (int) (overrideAlpha * currentAlpha * 255));
} else {
- drawDrawable(canvas, bitmapDrawable, (int) (overrideAlpha * 255));
+ drawDrawable(canvas, drawable, (int) (overrideAlpha * 255));
}
- checkAlphaAnimation();
+ checkAlphaAnimation(animationNotReady && crossfadeWithThumb);
return true;
} else if (staticThumb != null) {
drawDrawable(canvas, staticThumb, 255);
- checkAlphaAnimation();
+ checkAlphaAnimation(animationNotReady);
return true;
+ } else {
+ checkAlphaAnimation(animationNotReady);
}
} catch (Exception e) {
FileLog.e("tmessages", e);
@@ -570,10 +583,12 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
}
public Bitmap getBitmap() {
- if (currentImage != null) {
- return currentImage.getBitmap();
- } else if (currentThumb != null) {
- return currentThumb.getBitmap();
+ if (currentImage instanceof AnimatedFileDrawable) {
+ return ((AnimatedFileDrawable) currentImage).getAnimatedBitmap();
+ } else if (currentImage instanceof BitmapDrawable) {
+ return ((BitmapDrawable) currentImage).getBitmap();
+ } else if (currentThumb instanceof BitmapDrawable) {
+ return ((BitmapDrawable) currentThumb).getBitmap();
} else if (staticThumb instanceof BitmapDrawable) {
return ((BitmapDrawable) staticThumb).getBitmap();
}
@@ -581,11 +596,17 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
}
public int getBitmapWidth() {
+ if (currentImage instanceof AnimatedFileDrawable) {
+ return orientation == 0 || orientation == 180 ? currentImage.getIntrinsicWidth() : currentImage.getIntrinsicHeight();
+ }
Bitmap bitmap = getBitmap();
return orientation == 0 || orientation == 180 ? bitmap.getWidth() : bitmap.getHeight();
}
public int getBitmapHeight() {
+ if (currentImage instanceof AnimatedFileDrawable) {
+ return orientation == 0 || orientation == 180 ? currentImage.getIntrinsicHeight() : currentImage.getIntrinsicWidth();
+ }
Bitmap bitmap = getBitmap();
return orientation == 0 || orientation == 180 ? bitmap.getHeight() : bitmap.getWidth();
}
@@ -620,12 +641,20 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
return currentImage != null || currentThumb != null || currentKey != null || currentHttpUrl != null || staticThumb != null;
}
+ public boolean hasBitmapImage() {
+ return currentImage != null || currentThumb != null || staticThumb != null;
+ }
+
public void setAspectFit(boolean value) {
isAspectFit = value;
}
public void setParentView(View view) {
parentView = view;
+ if (currentImage instanceof AnimatedFileDrawable) {
+ AnimatedFileDrawable fileDrawable = (AnimatedFileDrawable) currentImage;
+ fileDrawable.setParentView(parentView);
+ }
}
public void setImageCoords(int x, int y, int width, int height) {
@@ -765,6 +794,30 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
return shouldGenerateQualityThumb;
}
+ public void setAllowStartAnimation(boolean value) {
+ allowStartAnimation = value;
+ }
+
+ public boolean isAllowStartAnimation() {
+ return allowStartAnimation;
+ }
+
+ public void startAnimation() {
+ if (currentImage instanceof AnimatedFileDrawable) {
+ ((AnimatedFileDrawable) currentImage).start();
+ }
+ }
+
+ public void stopAnimation() {
+ if (currentImage instanceof AnimatedFileDrawable) {
+ ((AnimatedFileDrawable) currentImage).stop();
+ }
+ }
+
+ public boolean isAnimationRunning() {
+ return currentImage instanceof AnimatedFileDrawable && ((AnimatedFileDrawable) currentImage).isRunning();
+ }
+
protected Integer getTag(boolean thumb) {
if (thumb) {
return thumbTag;
@@ -781,15 +834,17 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
}
}
- protected void setImageBitmapByKey(BitmapDrawable bitmap, String key, boolean thumb, boolean memCache) {
+ protected boolean setImageBitmapByKey(BitmapDrawable bitmap, String key, boolean thumb, boolean memCache) {
if (bitmap == null || key == null) {
- return;
+ return false;
}
if (!thumb) {
if (currentKey == null || !key.equals(currentKey)) {
- return;
+ return false;
+ }
+ if (!(bitmap instanceof AnimatedFileDrawable)) {
+ ImageLoader.getInstance().incrementUseCount(currentKey);
}
- ImageLoader.getInstance().incrementUseCount(currentKey);
currentImage = bitmap;
if (roundRadius != 0) {
Bitmap object = bitmap.getBitmap();
@@ -807,6 +862,13 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
} else {
currentAlpha = 1.0f;
}
+ if (bitmap instanceof AnimatedFileDrawable) {
+ AnimatedFileDrawable fileDrawable = (AnimatedFileDrawable) bitmap;
+ fileDrawable.setParentView(parentView);
+ if (allowStartAnimation) {
+ fileDrawable.start();
+ }
+ }
if (parentView != null) {
if (invalidateAll) {
@@ -815,9 +877,9 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
parentView.invalidate(imageX, imageY, imageX + imageW, imageY + imageH);
}
}
- } else if (currentThumb == null && (currentImage == null || forcePreview)) {
+ } else if (currentThumb == null && (currentImage == null || (currentImage instanceof AnimatedFileDrawable && !((AnimatedFileDrawable) currentImage).hasBitmap()) || forcePreview)) {
if (currentThumbKey == null || !key.equals(currentThumbKey)) {
- return;
+ return false;
}
ImageLoader.getInstance().incrementUseCount(currentThumbKey);
@@ -843,11 +905,12 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
if (delegate != null) {
delegate.didSetImage(this, currentImage != null || currentThumb != null || staticThumb != null, currentImage == null);
}
+ return true;
}
private void recycleBitmap(String newKey, boolean thumb) {
String key;
- BitmapDrawable image;
+ Drawable image;
if (thumb) {
key = currentThumbKey;
image = currentThumb;
@@ -855,19 +918,21 @@ public class ImageReceiver implements NotificationCenter.NotificationCenterDeleg
key = currentKey;
image = currentImage;
}
- BitmapDrawable newBitmap = null;
- if (newKey != null) {
- newBitmap = ImageLoader.getInstance().getImageFromMemory(newKey);
- }
- if (key != null && image != newBitmap && image != null) {
- Bitmap bitmap = image.getBitmap();
- boolean canDelete = ImageLoader.getInstance().decrementUseCount(key);
- if (!ImageLoader.getInstance().isInCache(key)) {
- if (ImageLoader.getInstance().runtimeHack != null) {
- ImageLoader.getInstance().runtimeHack.trackAlloc(bitmap.getRowBytes() * bitmap.getHeight());
- }
- if (canDelete) {
- bitmap.recycle();
+ if (key != null && (newKey == null || !newKey.equals(key)) && image != null) {
+ if (image instanceof AnimatedFileDrawable) {
+ AnimatedFileDrawable fileDrawable = (AnimatedFileDrawable) image;
+ fileDrawable.stop();
+ fileDrawable.recycle();
+ } else if (image instanceof BitmapDrawable) {
+ Bitmap bitmap = ((BitmapDrawable) image).getBitmap();
+ boolean canDelete = ImageLoader.getInstance().decrementUseCount(key);
+ if (!ImageLoader.getInstance().isInCache(key)) {
+ if (ImageLoader.getInstance().runtimeHack != null) {
+ ImageLoader.getInstance().runtimeHack.trackAlloc(bitmap.getRowBytes() * bitmap.getHeight());
+ }
+ if (canDelete) {
+ bitmap.recycle();
+ }
}
}
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java b/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java
index 355427b08..db7b99f7f 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/LocaleController.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -820,7 +820,11 @@ public class LocaleController {
return String.format(Locale.US, "%d.%d%s", number, lastDec, K);
}
}
- return String.format(Locale.US, "%d%s", number, K);
+ if (K.length() == 2) {
+ return String.format(Locale.US, "%dM", number);
+ } else {
+ return String.format(Locale.US, "%d%s", number, K);
+ }
}
public static String formatUserStatus(TLRPC.User user) {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/LruCache.java b/TMessagesProj/src/main/java/org/telegram/messenger/LruCache.java
index d1bed2cda..0b894cf2a 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/LruCache.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/LruCache.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java b/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java
index 50ec51e5a..902630b06 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/MediaController.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -46,7 +46,6 @@ import android.os.ParcelFileDescriptor;
import android.os.PowerManager;
import android.os.Vibrator;
import android.provider.MediaStore;
-import android.view.View;
import org.telegram.messenger.audioinfo.AudioInfo;
import org.telegram.messenger.query.SharedMediaQuery;
@@ -56,8 +55,6 @@ import org.telegram.messenger.video.Mp4Movie;
import org.telegram.messenger.video.OutputSurface;
import org.telegram.tgnet.ConnectionsManager;
import org.telegram.tgnet.TLRPC;
-import org.telegram.ui.Cells.ChatMediaCell;
-import org.telegram.ui.Components.GifDrawable;
import java.io.File;
import java.io.FileInputStream;
@@ -190,7 +187,6 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
}
public static class SearchImage {
- public int uid;
public String id;
public String imageUrl;
public String thumbUrl;
@@ -203,6 +199,7 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
public String thumbPath;
public String imagePath;
public CharSequence caption;
+ public TLRPC.Document document;
}
public final static String MIME_TYPE = "video/avc";
@@ -230,6 +227,8 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
public static final int AUTODOWNLOAD_MASK_AUDIO = 2;
public static final int AUTODOWNLOAD_MASK_VIDEO = 4;
public static final int AUTODOWNLOAD_MASK_DOCUMENT = 8;
+ public static final int AUTODOWNLOAD_MASK_MUSIC = 16;
+ public static final int AUTODOWNLOAD_MASK_GIF = 32;
public int mobileDataDownloadMask = 0;
public int wifiDownloadMask = 0;
public int roamingDownloadMask = 0;
@@ -237,10 +236,13 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
private ArrayList photoDownloadQueue = new ArrayList<>();
private ArrayList audioDownloadQueue = new ArrayList<>();
private ArrayList documentDownloadQueue = new ArrayList<>();
+ private ArrayList musicDownloadQueue = new ArrayList<>();
+ private ArrayList gifDownloadQueue = new ArrayList<>();
private ArrayList videoDownloadQueue = new ArrayList<>();
private HashMap downloadQueueKeys = new HashMap<>();
private boolean saveToGallery = true;
+ private boolean autoplayGifs = true;
private boolean shuffleMusic;
private int repeatMode;
@@ -254,10 +256,6 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
private ArrayList deleteLaterArray = new ArrayList<>();
private int lastTag = 0;
- private GifDrawable currentGifDrawable;
- private MessageObject currentGifMessageObject;
- private ChatMediaCell currentMediaCell;
-
private boolean isPaused = false;
private MediaPlayer audioPlayer = null;
private AudioTrack audioTrackPlayer = null;
@@ -535,10 +533,11 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
fileDecodingQueue = new DispatchQueue("fileDecodingQueue");
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("mainconfig", Activity.MODE_PRIVATE);
- mobileDataDownloadMask = preferences.getInt("mobileDataDownloadMask", AUTODOWNLOAD_MASK_PHOTO | AUTODOWNLOAD_MASK_AUDIO);
- wifiDownloadMask = preferences.getInt("wifiDownloadMask", AUTODOWNLOAD_MASK_PHOTO | AUTODOWNLOAD_MASK_AUDIO);
+ mobileDataDownloadMask = preferences.getInt("mobileDataDownloadMask", AUTODOWNLOAD_MASK_PHOTO | AUTODOWNLOAD_MASK_AUDIO | AUTODOWNLOAD_MASK_MUSIC | (Build.VERSION.SDK_INT >= 11 ? AUTODOWNLOAD_MASK_GIF : 0));
+ wifiDownloadMask = preferences.getInt("wifiDownloadMask", AUTODOWNLOAD_MASK_PHOTO | AUTODOWNLOAD_MASK_AUDIO | AUTODOWNLOAD_MASK_MUSIC | (Build.VERSION.SDK_INT >= 11 ? AUTODOWNLOAD_MASK_GIF : 0));
roamingDownloadMask = preferences.getInt("roamingDownloadMask", 0);
saveToGallery = preferences.getBoolean("save_gallery", false);
+ autoplayGifs = preferences.getBoolean("autoplay_gif", true) && Build.VERSION.SDK_INT >= 11;
shuffleMusic = preferences.getBoolean("shuffleMusic", false);
repeatMode = preferences.getInt("repeatMode", 0);
@@ -670,18 +669,14 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
public void cleanup() {
cleanupPlayer(false, true);
- if (currentGifDrawable != null) {
- currentGifDrawable.recycle();
- currentGifDrawable = null;
- }
- currentMediaCell = null;
audioInfo = null;
playMusicAgain = false;
- currentGifMessageObject = null;
photoDownloadQueue.clear();
audioDownloadQueue.clear();
documentDownloadQueue.clear();
videoDownloadQueue.clear();
+ musicDownloadQueue.clear();
+ gifDownloadQueue.clear();
downloadQueueKeys.clear();
videoConvertQueue.clear();
playlist.clear();
@@ -704,6 +699,12 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
if ((mobileDataDownloadMask & AUTODOWNLOAD_MASK_DOCUMENT) != 0 || (wifiDownloadMask & AUTODOWNLOAD_MASK_DOCUMENT) != 0 || (roamingDownloadMask & AUTODOWNLOAD_MASK_DOCUMENT) != 0) {
mask |= AUTODOWNLOAD_MASK_DOCUMENT;
}
+ if ((mobileDataDownloadMask & AUTODOWNLOAD_MASK_MUSIC) != 0 || (wifiDownloadMask & AUTODOWNLOAD_MASK_MUSIC) != 0 || (roamingDownloadMask & AUTODOWNLOAD_MASK_MUSIC) != 0) {
+ mask |= AUTODOWNLOAD_MASK_MUSIC;
+ }
+ if ((mobileDataDownloadMask & AUTODOWNLOAD_MASK_GIF) != 0 || (wifiDownloadMask & AUTODOWNLOAD_MASK_GIF) != 0 || (roamingDownloadMask & AUTODOWNLOAD_MASK_GIF) != 0) {
+ mask |= AUTODOWNLOAD_MASK_GIF;
+ }
return mask;
}
@@ -718,7 +719,8 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_PHOTO);
}
} else {
- for (DownloadObject downloadObject : photoDownloadQueue) {
+ for (int a = 0; a < photoDownloadQueue.size(); a++) {
+ DownloadObject downloadObject = photoDownloadQueue.get(a);
FileLoader.getInstance().cancelLoadFile((TLRPC.PhotoSize) downloadObject.object);
}
photoDownloadQueue.clear();
@@ -728,7 +730,8 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_AUDIO);
}
} else {
- for (DownloadObject downloadObject : audioDownloadQueue) {
+ for (int a = 0; a < audioDownloadQueue.size(); a++) {
+ DownloadObject downloadObject = audioDownloadQueue.get(a);
FileLoader.getInstance().cancelLoadFile((TLRPC.Audio) downloadObject.object);
}
audioDownloadQueue.clear();
@@ -738,8 +741,10 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_DOCUMENT);
}
} else {
- for (DownloadObject downloadObject : documentDownloadQueue) {
- FileLoader.getInstance().cancelLoadFile((TLRPC.Document) downloadObject.object);
+ for (int a = 0; a < documentDownloadQueue.size(); a++) {
+ DownloadObject downloadObject = documentDownloadQueue.get(a);
+ TLRPC.Document document = (TLRPC.Document) downloadObject.object;
+ FileLoader.getInstance().cancelLoadFile(document);
}
documentDownloadQueue.clear();
}
@@ -748,11 +753,36 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_VIDEO);
}
} else {
- for (DownloadObject downloadObject : videoDownloadQueue) {
+ for (int a = 0; a < videoDownloadQueue.size(); a++) {
+ DownloadObject downloadObject = videoDownloadQueue.get(a);
FileLoader.getInstance().cancelLoadFile((TLRPC.Video) downloadObject.object);
}
videoDownloadQueue.clear();
}
+ if ((currentMask & AUTODOWNLOAD_MASK_MUSIC) != 0) {
+ if (musicDownloadQueue.isEmpty()) {
+ newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_MUSIC);
+ }
+ } else {
+ for (int a = 0; a < musicDownloadQueue.size(); a++) {
+ DownloadObject downloadObject = musicDownloadQueue.get(a);
+ TLRPC.Document document = (TLRPC.Document) downloadObject.object;
+ FileLoader.getInstance().cancelLoadFile(document);
+ }
+ musicDownloadQueue.clear();
+ }
+ if ((currentMask & AUTODOWNLOAD_MASK_GIF) != 0) {
+ if (gifDownloadQueue.isEmpty()) {
+ newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_GIF);
+ }
+ } else {
+ for (int a = 0; a < gifDownloadQueue.size(); a++) {
+ DownloadObject downloadObject = gifDownloadQueue.get(a);
+ TLRPC.Document document = (TLRPC.Document) downloadObject.object;
+ FileLoader.getInstance().cancelLoadFile(document);
+ }
+ gifDownloadQueue.clear();
+ }
int mask = getAutodownloadMask();
if (mask == 0) {
@@ -770,6 +800,12 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
if ((mask & AUTODOWNLOAD_MASK_DOCUMENT) == 0) {
MessagesStorage.getInstance().clearDownloadQueue(AUTODOWNLOAD_MASK_DOCUMENT);
}
+ if ((mask & AUTODOWNLOAD_MASK_MUSIC) == 0) {
+ MessagesStorage.getInstance().clearDownloadQueue(AUTODOWNLOAD_MASK_MUSIC);
+ }
+ if ((mask & AUTODOWNLOAD_MASK_GIF) == 0) {
+ MessagesStorage.getInstance().clearDownloadQueue(AUTODOWNLOAD_MASK_GIF);
+ }
}
}
@@ -800,9 +836,20 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
queue = videoDownloadQueue;
} else if (type == AUTODOWNLOAD_MASK_DOCUMENT) {
queue = documentDownloadQueue;
+ } else if (type == AUTODOWNLOAD_MASK_MUSIC) {
+ queue = musicDownloadQueue;
+ } else if (type == AUTODOWNLOAD_MASK_GIF) {
+ queue = gifDownloadQueue;
}
- for (DownloadObject downloadObject : objects) {
- String path = FileLoader.getAttachFileName(downloadObject.object);
+ for (int a = 0; a < objects.size(); a++) {
+ DownloadObject downloadObject = objects.get(a);
+ String path;
+ if (downloadObject.object instanceof TLRPC.Document) {
+ TLRPC.Document document = (TLRPC.Document) downloadObject.object;
+ path = FileLoader.getAttachFileName(document);
+ } else {
+ path = FileLoader.getAttachFileName(downloadObject.object);
+ }
if (downloadQueueKeys.containsKey(path)) {
continue;
}
@@ -815,7 +862,8 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
} else if (downloadObject.object instanceof TLRPC.Video) {
FileLoader.getInstance().loadFile((TLRPC.Video) downloadObject.object, false);
} else if (downloadObject.object instanceof TLRPC.Document) {
- FileLoader.getInstance().loadFile((TLRPC.Document) downloadObject.object, false, false);
+ TLRPC.Document document = (TLRPC.Document) downloadObject.object;
+ FileLoader.getInstance().loadFile(document, false, false);
} else {
added = false;
}
@@ -840,6 +888,12 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
if ((mask & AUTODOWNLOAD_MASK_DOCUMENT) != 0 && (downloadMask & AUTODOWNLOAD_MASK_DOCUMENT) != 0 && documentDownloadQueue.isEmpty()) {
MessagesStorage.getInstance().getDownloadQueue(AUTODOWNLOAD_MASK_DOCUMENT);
}
+ if ((mask & AUTODOWNLOAD_MASK_MUSIC) != 0 && (downloadMask & AUTODOWNLOAD_MASK_MUSIC) != 0 && musicDownloadQueue.isEmpty()) {
+ MessagesStorage.getInstance().getDownloadQueue(AUTODOWNLOAD_MASK_MUSIC);
+ }
+ if ((mask & AUTODOWNLOAD_MASK_GIF) != 0 && (downloadMask & AUTODOWNLOAD_MASK_GIF) != 0 && gifDownloadQueue.isEmpty()) {
+ MessagesStorage.getInstance().getDownloadQueue(AUTODOWNLOAD_MASK_GIF);
+ }
}
private void checkDownloadFinished(String fileName, int state) {
@@ -869,6 +923,16 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
if (documentDownloadQueue.isEmpty()) {
newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_DOCUMENT);
}
+ } else if (downloadObject.type == AUTODOWNLOAD_MASK_MUSIC) {
+ musicDownloadQueue.remove(downloadObject);
+ if (musicDownloadQueue.isEmpty()) {
+ newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_MUSIC);
+ }
+ } else if (downloadObject.type == AUTODOWNLOAD_MASK_GIF) {
+ gifDownloadQueue.remove(downloadObject);
+ if (gifDownloadQueue.isEmpty()) {
+ newDownloadObjectsAvailable(AUTODOWNLOAD_MASK_GIF);
+ }
}
}
}
@@ -1596,6 +1660,9 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
}
private void checkIsNextMusicFileDownloaded() {
+ if ((getCurrentDownloadMask() & AUTODOWNLOAD_MASK_MUSIC) == 0) {
+ return;
+ }
ArrayList currentPlayList = shuffleMusic ? shuffledPlaylist : playlist;
if (currentPlayList == null || currentPlayList.size() < 2) {
return;
@@ -1615,7 +1682,7 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
final File cacheFile = file != null ? file : FileLoader.getPathToMessage(nextAudio.messageOwner);
boolean exist = cacheFile != null && cacheFile.exists();
if (cacheFile != null && cacheFile != file && !cacheFile.exists() && nextAudio.isMusic()) {
- FileLoader.getInstance().loadFile(nextAudio.messageOwner.media.document, true, false);
+ FileLoader.getInstance().loadFile(nextAudio.messageOwner.media.document, false, false);
}
}
@@ -1643,7 +1710,7 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
}
final File cacheFile = file != null ? file : FileLoader.getPathToMessage(messageObject.messageOwner);
if (cacheFile != null && cacheFile != file && !cacheFile.exists() && messageObject.isMusic()) {
- FileLoader.getInstance().loadFile(messageObject.messageOwner.media.document, true, false);
+ FileLoader.getInstance().loadFile(messageObject.messageOwner.media.document, false, false);
downloadingCurrentMessage = true;
isPaused = false;
lastProgress = 0;
@@ -2243,76 +2310,6 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
}
}
- public GifDrawable getGifDrawable(ChatMediaCell cell, boolean create) {
- if (cell == null) {
- return null;
- }
-
- MessageObject messageObject = cell.getMessageObject();
- if (messageObject == null) {
- return null;
- }
-
- if (currentGifDrawable != null && currentGifMessageObject != null && messageObject.getId() == currentGifMessageObject.getId()) {
- currentMediaCell = cell;
- currentGifDrawable.parentView = new WeakReference(cell);
- return currentGifDrawable;
- }
-
- if (create) {
- if (currentMediaCell != null) {
- if (currentGifDrawable != null) {
- currentGifDrawable.stop();
- currentGifDrawable.recycle();
- }
- currentMediaCell.clearGifImage();
- }
- currentGifMessageObject = cell.getMessageObject();
- currentMediaCell = cell;
-
- File cacheFile = null;
- if (currentGifMessageObject.messageOwner.attachPath != null && currentGifMessageObject.messageOwner.attachPath.length() != 0) {
- File f = new File(currentGifMessageObject.messageOwner.attachPath);
- if (f.length() > 0) {
- cacheFile = f;
- }
- }
- if (cacheFile == null) {
- cacheFile = FileLoader.getPathToMessage(messageObject.messageOwner);
- }
- try {
- currentGifDrawable = new GifDrawable(cacheFile);
- currentGifDrawable.parentView = new WeakReference(cell);
- return currentGifDrawable;
- } catch (Exception e) {
- FileLog.e("tmessages", e);
- }
- }
-
- return null;
- }
-
- public void clearGifDrawable(ChatMediaCell cell) {
- if (cell == null) {
- return;
- }
-
- MessageObject messageObject = cell.getMessageObject();
- if (messageObject == null) {
- return;
- }
-
- if (currentGifMessageObject != null && messageObject.getId() == currentGifMessageObject.getId()) {
- if (currentGifDrawable != null) {
- currentGifDrawable.stop();
- currentGifDrawable.recycle();
- currentGifDrawable = null;
- }
- currentMediaCell = null;
- currentGifMessageObject = null;
- }
- }
-
public static boolean isWebp(Uri uri) {
ParcelFileDescriptor parcelFD = null;
FileInputStream input = null;
@@ -2437,6 +2434,14 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
checkSaveToGalleryFiles();
}
+ public void toggleAutoplayGifs() {
+ autoplayGifs = !autoplayGifs;
+ SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("mainconfig", Activity.MODE_PRIVATE);
+ SharedPreferences.Editor editor = preferences.edit();
+ editor.putBoolean("autoplay_gif", autoplayGifs);
+ editor.commit();
+ }
+
public void checkSaveToGalleryFiles() {
try {
File telegramPath = new File(Environment.getExternalStorageDirectory(), "Telegram");
@@ -2469,6 +2474,10 @@ public class MediaController implements NotificationCenter.NotificationCenterDel
return saveToGallery;
}
+ public boolean canAutoplayGifs() {
+ return autoplayGifs;
+ }
+
public static void loadGalleryPhotosAlbums(final int guid) {
new Thread(new Runnable() {
@Override
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MessageKeyData.java b/TMessagesProj/src/main/java/org/telegram/messenger/MessageKeyData.java
index ac218a10b..594a63209 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/MessageKeyData.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/MessageKeyData.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java b/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java
index 4a0d9bbc4..305818bb8 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/MessageObject.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -375,6 +375,8 @@ public class MessageObject {
}
} else if (isMusic()) {
messageText = LocaleController.getString("AttachMusic", R.string.AttachMusic);
+ } else if (isGif()) {
+ messageText = LocaleController.getString("AttachGif", R.string.AttachGif);
} else {
String name = FileLoader.getDocumentFileName(message.media.document);
if (name != null && name.length() > 0) {
@@ -418,7 +420,7 @@ public class MessageObject {
} else if (message.media instanceof TLRPC.TL_messageMediaDocument) {
contentType = 1;
if (message.media.document.mime_type != null) {
- if (message.media.document.mime_type.equals("image/gif") && message.media.document.thumb != null && !(message.media.document.thumb instanceof TLRPC.TL_photoSizeEmpty)) {
+ if (isGifDocument(message.media.document)) {
type = 8;
} else if (message.media.document.mime_type.equals("image/webp") && isSticker()) {
type = 13;
@@ -478,14 +480,31 @@ public class MessageObject {
generateThumbs(false);
}
+ public static boolean isGifDocument(TLRPC.Document document) {
+ return document != null && document.thumb != null && document.mime_type != null && (document.mime_type.equals("image/gif") || isNewGifDocument(document));
+ }
+
+ public static boolean isNewGifDocument(TLRPC.Document document) {
+ if (document != null && document.mime_type != null && document.mime_type.equals("video/mp4")) {
+ for (int a = 0; a < document.attributes.size(); a++) {
+ if (document.attributes.get(a) instanceof TLRPC.TL_documentAttributeAnimated) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
public void generateThumbs(boolean update) {
if (messageOwner instanceof TLRPC.TL_messageService) {
if (messageOwner.action instanceof TLRPC.TL_messageActionChatEditPhoto) {
if (!update) {
photoThumbs = new ArrayList<>(messageOwner.action.photo.sizes);
} else if (photoThumbs != null && !photoThumbs.isEmpty()) {
- for (TLRPC.PhotoSize photoObject : photoThumbs) {
- for (TLRPC.PhotoSize size : messageOwner.action.photo.sizes) {
+ for (int a = 0; a < photoThumbs.size(); a++) {
+ TLRPC.PhotoSize photoObject = photoThumbs.get(a);
+ for (int b = 0; b < messageOwner.action.photo.sizes.size(); b++) {
+ TLRPC.PhotoSize size = messageOwner.action.photo.sizes.get(b);
if (size instanceof TLRPC.TL_photoSizeEmpty) {
continue;
}
@@ -499,11 +518,13 @@ public class MessageObject {
}
} else if (messageOwner.media != null && !(messageOwner.media instanceof TLRPC.TL_messageMediaEmpty)) {
if (messageOwner.media instanceof TLRPC.TL_messageMediaPhoto) {
- if (!update) {
+ if (!update || photoThumbs != null && photoThumbs.size() != messageOwner.media.photo.sizes.size()) {
photoThumbs = new ArrayList<>(messageOwner.media.photo.sizes);
} else if (photoThumbs != null && !photoThumbs.isEmpty()) {
- for (TLRPC.PhotoSize photoObject : photoThumbs) {
- for (TLRPC.PhotoSize size : messageOwner.media.photo.sizes) {
+ for (int a = 0; a < photoThumbs.size(); a++) {
+ TLRPC.PhotoSize photoObject = photoThumbs.get(a);
+ for (int b = 0; b < messageOwner.media.photo.sizes.size(); b++) {
+ TLRPC.PhotoSize size = messageOwner.media.photo.sizes.get(b);
if (size instanceof TLRPC.TL_photoSizeEmpty) {
continue;
}
@@ -537,8 +558,10 @@ public class MessageObject {
if (!update || photoThumbs == null) {
photoThumbs = new ArrayList<>(messageOwner.media.webpage.photo.sizes);
} else if (!photoThumbs.isEmpty()) {
- for (TLRPC.PhotoSize photoObject : photoThumbs) {
- for (TLRPC.PhotoSize size : messageOwner.media.webpage.photo.sizes) {
+ for (int a = 0; a < photoThumbs.size(); a++) {
+ TLRPC.PhotoSize photoObject = photoThumbs.get(a);
+ for (int b = 0; b < messageOwner.media.webpage.photo.sizes.size(); b++) {
+ TLRPC.PhotoSize size = messageOwner.media.webpage.photo.sizes.get(b);
if (size instanceof TLRPC.TL_photoSizeEmpty) {
continue;
}
@@ -749,7 +772,7 @@ public class MessageObject {
private static void addUsernamesAndHashtags(CharSequence charSequence, boolean botCommands) {
try {
if (urlPattern == null) {
- urlPattern = Pattern.compile("(^|\\s)/[a-zA-Z@\\d_]{1,255}|(^|\\s)@[a-zA-Z\\d_]{5,32}|(^|\\s)#[\\w\\.]+");
+ urlPattern = Pattern.compile("(^|\\s)/[a-zA-Z@\\d_]{1,255}|(^|\\s)@[a-zA-Z\\d_]{3,32}|(^|\\s)#[\\w\\.]+");
}
Matcher matcher = urlPattern.matcher(charSequence);
while (matcher.find()) {
@@ -815,12 +838,13 @@ public class MessageObject {
messageOwner instanceof TLRPC.TL_messageForwarded_old ||
messageOwner instanceof TLRPC.TL_messageForwarded_old2 ||
messageOwner instanceof TLRPC.TL_message_secret ||
- isOut() && messageOwner.send_state != MESSAGE_SEND_STATE_SENT || messageOwner.id < 0);
+ isOut() && messageOwner.send_state != MESSAGE_SEND_STATE_SENT ||
+ messageOwner.id < 0 || messageOwner.media instanceof TLRPC.TL_messageMediaUnsupported);
if (useManualParse) {
addLinks(messageText);
} else {
- if (messageText instanceof Spannable && messageText.length() < 100) {
+ if (messageText instanceof Spannable && messageText.length() < 200) {
try {
Linkify.addLinks((Spannable) messageText, Linkify.PHONE_NUMBERS);
} catch (Throwable e) {
@@ -844,7 +868,7 @@ public class MessageObject {
} else if (entity instanceof TLRPC.TL_messageEntityItalic) {
spannable.setSpan(new TypefaceSpan(AndroidUtilities.getTypeface("fonts/ritalic.ttf")), entity.offset, entity.offset + entity.length, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
} else if (entity instanceof TLRPC.TL_messageEntityCode || entity instanceof TLRPC.TL_messageEntityPre) {
- spannable.setSpan(new TypefaceSpan(Typeface.MONOSPACE), entity.offset, entity.offset + entity.length, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
+ spannable.setSpan(new TypefaceSpan(Typeface.MONOSPACE, AndroidUtilities.dp(MessagesController.getInstance().fontSize - 1)), entity.offset, entity.offset + entity.length, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
} else if (!useManualParse) {
String url = messageOwner.message.substring(entity.offset, entity.offset + entity.length);
if (entity instanceof TLRPC.TL_messageEntityBotCommand) {
@@ -1315,6 +1339,14 @@ public class MessageObject {
return isMusicMessage(messageOwner);
}
+ public boolean isGif() {
+ return isGifDocument(messageOwner.media.document);
+ }
+
+ public boolean isNewGif() {
+ return messageOwner.media != null && isNewGifDocument(messageOwner.media.document);
+ }
+
public String getMusicTitle() {
for (TLRPC.DocumentAttribute attribute : messageOwner.media.document.attributes) {
if (attribute instanceof TLRPC.TL_documentAttributeAudio) {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java
index 2a8e3a3a6..769784be6 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -471,9 +471,18 @@ public class MessagesController implements NotificationCenter.NotificationCenter
onlinePrivacy.clear();
lastPrintingStringCount = 0;
nextDialogsCacheOffset = 0;
- updatesQueueSeq.clear();
- updatesQueuePts.clear();
- updatesQueueQts.clear();
+ Utilities.stageQueue.postRunnable(new Runnable() {
+ @Override
+ public void run() {
+ updatesQueueSeq.clear();
+ updatesQueuePts.clear();
+ updatesQueueQts.clear();
+ updatesStartWaitTimeSeq = 0;
+ updatesStartWaitTimePts = 0;
+ updatesStartWaitTimeQts = 0;
+ gettingDifference = false;
+ }
+ });
blockedUsers.clear();
sendingTypings.clear();
loadingFullUsers.clear();
@@ -484,15 +493,11 @@ public class MessagesController implements NotificationCenter.NotificationCenter
loadedFullParticipants.clear();
loadedFullChats.clear();
- updatesStartWaitTimeSeq = 0;
- updatesStartWaitTimePts = 0;
- updatesStartWaitTimeQts = 0;
currentDeletingTaskTime = 0;
currentDeletingTaskMids = null;
gettingNewDeleteTask = false;
loadingDialogs = false;
dialogsEndReached = false;
- gettingDifference = false;
loadingBlockedUsers = false;
firstGettingTask = false;
updatingState = false;
@@ -872,6 +877,23 @@ public class MessagesController implements NotificationCenter.NotificationCenter
reloadingMessages.remove(dialog_id);
}
}
+ MessageObject dialogObj = dialogMessage.get(dialog_id);
+ if (dialogObj != null) {
+ for (int a = 0; a < objects.size(); a++) {
+ MessageObject obj = objects.get(a);
+ if (dialogObj != null && dialogObj.getId() == obj.getId()) {
+ dialogMessage.put(dialog_id, obj);
+ if (obj.messageOwner.to_id.channel_id == 0) {
+ obj = dialogMessagesByIds.remove(obj.getId());
+ if (obj != null) {
+ dialogMessagesByIds.put(obj.getId(), obj);
+ }
+ }
+ NotificationCenter.getInstance().postNotificationName(NotificationCenter.dialogsNeedReload);
+ break;
+ }
+ }
+ }
NotificationCenter.getInstance().postNotificationName(NotificationCenter.replaceMessagesObjects, dialog_id, objects);
}
});
@@ -1999,7 +2021,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
objects.add(new MessageObject(message, usersDict, chatsDict, true));
if (isCache) {
if (message.media instanceof TLRPC.TL_messageMediaUnsupported) {
- if (message.media.bytes.length == 0 || message.media.bytes.length == 1 && message.media.bytes[0] < TLRPC.LAYER) {
+ if (message.media.bytes != null && (message.media.bytes.length == 0 || message.media.bytes.length == 1 && message.media.bytes[0] < TLRPC.LAYER)) {
messagesToReload.add(message.id);
}
} else if (message.media instanceof TLRPC.TL_messageMediaWebPage) {
@@ -2298,7 +2320,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
TLRPC.Message message = dialogsRes.messages.get(a);
if (message.to_id.channel_id != 0) {
TLRPC.Chat chat = chatsDict.get(message.to_id.channel_id);
- if (chat != null && chat.left && !chat.megagroup) {
+ if (chat != null && chat.left/* && !chat.megagroup*/) {
continue;
}
if (chat != null && chat.megagroup) {
@@ -2340,7 +2362,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
d.top_message = Math.max(d.top_message, d.top_not_important_message);
d.unread_count = Math.max(d.unread_count, d.unread_not_important_count);
}
- if (chat != null && chat.left && !chat.megagroup) {
+ if (chat != null && chat.left/* && !chat.megagroup*/) {
continue;
}
channelsPts.put(-(int) d.id, d.pts);
@@ -2634,7 +2656,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
TLRPC.Message message = dialogsRes.messages.get(a);
if (message.to_id.channel_id != 0) {
TLRPC.Chat chat = chatsDict.get(message.to_id.channel_id);
- if (chat != null && chat.left && !chat.megagroup) {
+ if (chat != null && chat.left/* && !chat.megagroup*/) {
continue;
}
} else if (message.to_id.chat_id != 0) {
@@ -2659,7 +2681,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
}
if (d instanceof TLRPC.TL_dialogChannel) {
TLRPC.Chat chat = chatsDict.get(-(int) d.id);
- if (chat != null && chat.left && !chat.megagroup) {
+ if (chat != null && chat.left/* && !chat.megagroup*/) {
continue;
}
} else if ((int) d.id < 0) {
@@ -3607,9 +3629,13 @@ public class MessagesController implements NotificationCenter.NotificationCenter
}
public void performLogout(boolean byUser) {
- SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("Notifications", Activity.MODE_PRIVATE);
- SharedPreferences.Editor editor = preferences.edit();
+ SharedPreferences.Editor editor = ApplicationLoader.applicationContext.getSharedPreferences("Notifications", Activity.MODE_PRIVATE).edit();
editor.clear().commit();
+ editor = ApplicationLoader.applicationContext.getSharedPreferences("emoji", Activity.MODE_PRIVATE).edit();
+ editor.putLong("lastGifLoadTime", 0).commit();
+ editor = ApplicationLoader.applicationContext.getSharedPreferences("mainconfig", Activity.MODE_PRIVATE).edit();
+ editor.remove("gifhint").commit();
+
if (byUser) {
unregistedPush();
TLRPC.TL_auth_logOut req = new TLRPC.TL_auth_logOut();
@@ -3707,7 +3733,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
ConnectionsManager.getInstance().sendRequest(req, new RequestDelegate() {
@Override
public void run(TLObject response, TLRPC.TL_error error) {
- if (error == null) {
+ if (response instanceof TLRPC.TL_boolTrue) {
FileLog.e("tmessages", "registered for push");
UserConfig.registeredForPush = true;
UserConfig.pushString = regid;
@@ -4607,6 +4633,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
final int user_id = updates instanceof TLRPC.TL_updateShortChatMessage ? updates.from_id : updates.user_id;
TLRPC.User user = getUser(user_id);
TLRPC.User user2 = null;
+ TLRPC.User user3 = null;
TLRPC.Chat channel = null;
if (user == null) {
@@ -4631,16 +4658,26 @@ public class MessagesController implements NotificationCenter.NotificationCenter
needFwdUser = true;
}
+ boolean needBotUser = false;
+ if (updates.via_bot_id != 0) {
+ user3 = getUser(updates.via_bot_id);
+ if (user3 == null) {
+ user3 = MessagesStorage.getInstance().getUserSync(updates.via_bot_id);
+ putUser(user3, true);
+ }
+ needBotUser = true;
+ }
+
boolean missingData;
if (updates instanceof TLRPC.TL_updateShortMessage) {
- missingData = user == null || needFwdUser && user2 == null && channel == null;
+ missingData = user == null || needFwdUser && user2 == null && channel == null || needBotUser && user3 == null;
} else {
TLRPC.Chat chat = getChat(updates.chat_id);
if (chat == null) {
chat = MessagesStorage.getInstance().getChatSync(updates.chat_id);
putChat(chat, true);
}
- missingData = chat == null || user == null || needFwdUser && user2 == null && channel == null;
+ missingData = chat == null || user == null || needFwdUser && user2 == null && channel == null || needBotUser && user3 == null;
}
if (user != null && user.status != null && user.status.expires <= 0) {
onlinePrivacy.put(user.id, ConnectionsManager.getInstance().getCurrentTime());
@@ -4675,6 +4712,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
message.entities = updates.entities;
message.message = updates.message;
message.date = updates.date;
+ message.via_bot_id = updates.via_bot_id;
message.flags = updates.flags | TLRPC.MESSAGE_FLAG_HAS_FROM_ID;
message.fwd_from_id = updates.fwd_from_id;
message.fwd_date = updates.fwd_date;
@@ -5085,9 +5123,6 @@ public class MessagesController implements NotificationCenter.NotificationCenter
if (chat.megagroup) {
message.flags |= TLRPC.MESSAGE_FLAG_MEGAGROUP;
}
- if (chat.left) {
- continue;
- }
}
if (message.from_id > 0) {
TLRPC.User user = getUser(message.from_id);
@@ -5485,6 +5520,8 @@ public class MessagesController implements NotificationCenter.NotificationCenter
updatesOnMainThread.add(update);
} else if (update instanceof TLRPC.TL_updateNewStickerSet) {
updatesOnMainThread.add(update);
+ } else if (update instanceof TLRPC.TL_updateSavedGifs) {
+ updatesOnMainThread.add(update);
}
}
if (!messages.isEmpty()) {
@@ -5672,6 +5709,9 @@ public class MessagesController implements NotificationCenter.NotificationCenter
StickersQuery.reorderStickers(update.order);
} else if (update instanceof TLRPC.TL_updateNewStickerSet) {
StickersQuery.addNewStickerSet(update.stickerset);
+ } else if (update instanceof TLRPC.TL_updateSavedGifs) {
+ SharedPreferences.Editor editor2 = ApplicationLoader.applicationContext.getSharedPreferences("emoji", Activity.MODE_PRIVATE).edit();
+ editor2.putLong("lastGifLoadTime", 0).commit();
}
}
if (editor != null) {
@@ -5893,6 +5933,17 @@ public class MessagesController implements NotificationCenter.NotificationCenter
updateInterfaceWithMessages(uid, messages, false);
}
+ protected static void addNewGifToRecent(TLRPC.Document document, int date) {
+ ArrayList arrayList = new ArrayList<>();
+ MediaController.SearchImage searchImage = new MediaController.SearchImage();
+ searchImage.type = 2;
+ searchImage.document = document;
+ searchImage.date = date;
+ searchImage.id = "" + searchImage.document.id;
+ arrayList.add(searchImage);
+ MessagesStorage.getInstance().putWebRecent(arrayList);
+ }
+
protected void updateInterfaceWithMessages(final long uid, final ArrayList messages, boolean isBroadcast) {
if (messages == null || messages.isEmpty()) {
return;
@@ -5911,6 +5962,9 @@ public class MessagesController implements NotificationCenter.NotificationCenter
channelId = message.messageOwner.to_id.channel_id;
}
}
+ if (message.isOut() && message.isNewGif() && !message.isSending()) {
+ addNewGifToRecent(message.messageOwner.media.document, message.messageOwner.date);
+ }
}
NotificationCenter.getInstance().postNotificationName(NotificationCenter.didReceivedNewMessages, uid, messages);
@@ -5941,7 +5995,7 @@ public class MessagesController implements NotificationCenter.NotificationCenter
if (dialog == null) {
if (!isBroadcast) {
TLRPC.Chat chat = getChat(channelId);
- if (chat != null && chat.left) {
+ if (channelId != 0 && chat == null || chat != null && chat.left) {
return;
}
if (!ChatObject.isChannel(chat)) {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java
index 560328f2c..a904bd732 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/MessagesStorage.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -153,7 +153,8 @@ public class MessagesStorage {
database.executeFast("CREATE TABLE user_photos(uid INTEGER, id INTEGER, data BLOB, PRIMARY KEY (uid, id))").stepThis().dispose();
database.executeFast("CREATE TABLE blocked_users(uid INTEGER PRIMARY KEY)").stepThis().dispose();
database.executeFast("CREATE TABLE dialog_settings(did INTEGER PRIMARY KEY, flags INTEGER);").stepThis().dispose();
- database.executeFast("CREATE TABLE web_recent_v3(id TEXT, type INTEGER, image_url TEXT, thumb_url TEXT, local_url TEXT, width INTEGER, height INTEGER, size INTEGER, date INTEGER, PRIMARY KEY (id, type));").stepThis().dispose();
+ database.executeFast("CREATE TABLE web_recent_v3(id TEXT, type INTEGER, image_url TEXT, thumb_url TEXT, local_url TEXT, width INTEGER, height INTEGER, size INTEGER, date INTEGER, document BLOB, PRIMARY KEY (id, type));").stepThis().dispose();
+ database.executeFast("CREATE TABLE bot_recent(id INTEGER PRIMARY KEY, date INTEGER);").stepThis().dispose();
database.executeFast("CREATE TABLE stickers_v2(id INTEGER PRIMARY KEY, data BLOB, date INTEGER, hash TEXT);").stepThis().dispose();
database.executeFast("CREATE TABLE hashtag_recent_v2(id TEXT PRIMARY KEY, date INTEGER);").stepThis().dispose();
database.executeFast("CREATE TABLE webpage_pending(id INTEGER, mid INTEGER, PRIMARY KEY (id, mid));").stepThis().dispose();
@@ -165,7 +166,7 @@ public class MessagesStorage {
database.executeFast("CREATE TABLE bot_info(uid INTEGER PRIMARY KEY, info BLOB)").stepThis().dispose();
//version
- database.executeFast("PRAGMA user_version = 27").stepThis().dispose();
+ database.executeFast("PRAGMA user_version = 29").stepThis().dispose();
//database.executeFast("CREATE TABLE secret_holes(uid INTEGER, seq_in INTEGER, seq_out INTEGER, data BLOB, PRIMARY KEY (uid, seq_in, seq_out));").stepThis().dispose();
//database.executeFast("CREATE TABLE attach_data(uid INTEGER, id INTEGER, data BLOB, PRIMARY KEY (uid, id))").stepThis().dispose();
@@ -199,7 +200,7 @@ public class MessagesStorage {
}
}
int version = database.executeInt("PRAGMA user_version");
- if (version < 27) {
+ if (version < 29) {
updateDbToLastVersion(version);
}
}
@@ -476,7 +477,17 @@ public class MessagesStorage {
if (version == 25 || version == 26) {
database.executeFast("CREATE TABLE IF NOT EXISTS channel_users_v2(did INTEGER, uid INTEGER, date INTEGER, data BLOB, PRIMARY KEY(did, uid))").stepThis().dispose();
database.executeFast("PRAGMA user_version = 27").stepThis().dispose();
- //version = 27;
+ version = 27;
+ }
+ if (version == 27) {
+ database.executeFast("ALTER TABLE web_recent_v3 ADD COLUMN document BLOB default NULL").stepThis().dispose();
+ database.executeFast("PRAGMA user_version = 28").stepThis().dispose();
+ version = 28;
+ }
+ if (version == 28) {
+ database.executeFast("CREATE TABLE IF NOT EXISTS bot_recent(id INTEGER PRIMARY KEY, date INTEGER);").stepThis().dispose();
+ database.executeFast("PRAGMA user_version = 29").stepThis().dispose();
+ //version = 29;
}
} catch (Exception e) {
FileLog.e("tmessages", e);
@@ -752,7 +763,7 @@ public class MessagesStorage {
@Override
public void run() {
try {
- SQLiteCursor cursor = database.queryFinalized("SELECT id, image_url, thumb_url, local_url, width, height, size, date FROM web_recent_v3 WHERE type = " + type);
+ SQLiteCursor cursor = database.queryFinalized("SELECT id, image_url, thumb_url, local_url, width, height, size, date, document FROM web_recent_v3 WHERE type = " + type + " ORDER BY date DESC");
final ArrayList arrayList = new ArrayList<>();
while (cursor.next()) {
MediaController.SearchImage searchImage = new MediaController.SearchImage();
@@ -764,28 +775,56 @@ public class MessagesStorage {
searchImage.height = cursor.intValue(5);
searchImage.size = cursor.intValue(6);
searchImage.date = cursor.intValue(7);
+ if (!cursor.isNull(8)) {
+ NativeByteBuffer data = new NativeByteBuffer(cursor.byteArrayLength(8));
+ if (data != null && cursor.byteBufferValue(8, data) != 0) {
+ searchImage.document = TLRPC.Document.TLdeserialize(data, data.readInt32(false), false);
+ }
+ data.reuse();
+ }
searchImage.type = type;
arrayList.add(searchImage);
}
cursor.dispose();
- Collections.sort(arrayList, new Comparator() {
- @Override
- public int compare(MediaController.SearchImage lhs, MediaController.SearchImage rhs) {
- if (lhs.date < rhs.date) {
- return 1;
- } else if (lhs.date > rhs.date) {
- return -1;
- } else {
- return 0;
- }
- }
- });
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
NotificationCenter.getInstance().postNotificationName(NotificationCenter.recentImagesDidLoaded, type, arrayList);
}
});
+ } catch (Throwable e) {
+ FileLog.e("tmessages", e);
+ }
+ }
+ });
+ }
+
+ public void addRecentLocalFile(final String imageUrl, final String localUrl, final TLRPC.Document document) {
+ if (imageUrl == null || imageUrl.length() == 0 || ((localUrl == null || localUrl.length() == 0) && document == null)) {
+ return;
+ }
+ storageQueue.postRunnable(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ if (document != null) {
+ SQLitePreparedStatement state = database.executeFast("UPDATE web_recent_v3 SET document = ? WHERE image_url = ?");
+ state.requery();
+ NativeByteBuffer data = new NativeByteBuffer(document.getObjectSize());
+ document.serializeToStream(data);
+ state.bindByteBuffer(1, data);
+ state.bindString(2, imageUrl);
+ state.step();
+ state.dispose();
+ data.reuse();
+ } else {
+ SQLitePreparedStatement state = database.executeFast("UPDATE web_recent_v3 SET local_url = ? WHERE image_url = ?");
+ state.requery();
+ state.bindString(1, localUrl);
+ state.bindString(2, imageUrl);
+ state.step();
+ state.dispose();
+ }
} catch (Exception e) {
FileLog.e("tmessages", e);
}
@@ -793,15 +832,15 @@ public class MessagesStorage {
});
}
- public void addRecentLocalFile(final String imageUrl, final String localUrl) {
- if (imageUrl == null || localUrl == null || imageUrl.length() == 0 || localUrl.length() == 0) {
+ public void removeWebRecent(final MediaController.SearchImage searchImage) {
+ if (searchImage == null) {
return;
}
storageQueue.postRunnable(new Runnable() {
@Override
public void run() {
try {
- database.executeFast("UPDATE web_recent_v3 SET local_url = '" + localUrl + "' WHERE image_url = '" + imageUrl + "'").stepThis().dispose();
+ database.executeFast("DELETE FROM web_recent_v3 WHERE id = '" + searchImage.id + "'").stepThis().dispose();
} catch (Exception e) {
FileLog.e("tmessages", e);
}
@@ -828,32 +867,40 @@ public class MessagesStorage {
public void run() {
try {
database.beginTransaction();
- SQLitePreparedStatement state = database.executeFast("REPLACE INTO web_recent_v3 VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)");
+ SQLitePreparedStatement state = database.executeFast("REPLACE INTO web_recent_v3 VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
for (int a = 0; a < arrayList.size(); a++) {
- if (a == 100) {
+ if (a == 200) {
break;
}
MediaController.SearchImage searchImage = arrayList.get(a);
- if (searchImage.localUrl == null) {
- searchImage.localUrl = "";
- }
state.requery();
state.bindString(1, searchImage.id);
state.bindInteger(2, searchImage.type);
- state.bindString(3, searchImage.imageUrl);
- state.bindString(4, searchImage.thumbUrl);
- state.bindString(5, searchImage.localUrl);
+ state.bindString(3, searchImage.imageUrl != null ? searchImage.imageUrl : "");
+ state.bindString(4, searchImage.thumbUrl != null ? searchImage.thumbUrl : "");
+ state.bindString(5, searchImage.localUrl != null ? searchImage.localUrl : "");
state.bindInteger(6, searchImage.width);
state.bindInteger(7, searchImage.height);
state.bindInteger(8, searchImage.size);
state.bindInteger(9, searchImage.date);
+ NativeByteBuffer data = null;
+ if (searchImage.document != null) {
+ data = new NativeByteBuffer(searchImage.document.getObjectSize());
+ searchImage.document.serializeToStream(data);
+ state.bindByteBuffer(10, data);
+ } else {
+ state.bindNull(10);
+ }
state.step();
+ if (data != null) {
+ data.reuse();
+ }
}
state.dispose();
database.commitTransaction();
- if (arrayList.size() >= 100) {
+ if (arrayList.size() >= 200) {
database.beginTransaction();
- for (int a = 100; a < arrayList.size(); a++) {
+ for (int a = 200; a < arrayList.size(); a++) {
database.executeFast("DELETE FROM web_recent_v3 WHERE id = '" + arrayList.get(a).id + "'").stepThis().dispose();
}
database.commitTransaction();
@@ -3589,7 +3636,7 @@ public class MessagesStorage {
type = MediaController.AUTODOWNLOAD_MASK_VIDEO;
object = message.media.video;
}
- } else if (message.media instanceof TLRPC.TL_messageMediaDocument) {
+ } else if (message.media instanceof TLRPC.TL_messageMediaDocument && !MessageObject.isMusicMessage(message) && !MessageObject.isGifDocument(message.media.document)) {
if ((downloadMask & MediaController.AUTODOWNLOAD_MASK_DOCUMENT) != 0) {
id = message.media.document.id;
type = MediaController.AUTODOWNLOAD_MASK_DOCUMENT;
@@ -4906,6 +4953,9 @@ public class MessagesStorage {
}
}
}
+ if (message.via_bot_id != 0 && !usersToLoad.contains(message.via_bot_id)) {
+ usersToLoad.add(message.via_bot_id);
+ }
if (message.action != null) {
if (message.action.user_id != 0 && !usersToLoad.contains(message.action.user_id)) {
usersToLoad.add(message.action.user_id);
@@ -5353,7 +5403,13 @@ public class MessagesStorage {
cursor.dispose();
}
}
- semaphore.release();
+ try {
+ if (semaphore != null) {
+ semaphore.release();
+ }
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ }
}
});
try {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerReceiver.java
index cb910e5a0..5af30d3e1 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerReceiver.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerReceiver.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerService.java b/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerService.java
index a6717ee0a..f5fe15f88 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerService.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/MusicPlayerService.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/NativeCrashManager.java b/TMessagesProj/src/main/java/org/telegram/messenger/NativeCrashManager.java
index a31184866..1e10edb6f 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/NativeCrashManager.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/NativeCrashManager.java
@@ -7,6 +7,7 @@ import android.util.Log;
import net.hockeyapp.android.Constants;
import net.hockeyapp.android.utils.SimpleMultipartEntity;
+import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
@@ -71,7 +72,7 @@ public class NativeCrashManager {
attachmentUri = Uri.fromFile(new File(Constants.FILES_PATH, logFilename));
input = activity.getContentResolver().openInputStream(attachmentUri);
- entity.addPart("log", attachmentUri.getLastPathSegment(), input, false);
+ entity.addPart("log", attachmentUri.getLastPathSegment(), input, true);
entity.writeLastBoundaryIfNeeds();
@@ -80,9 +81,15 @@ public class NativeCrashManager {
urlConnection.setRequestMethod("POST");
urlConnection.setRequestProperty("Content-Type", entity.getContentType());
urlConnection.setRequestProperty("Content-Length", String.valueOf(entity.getContentLength()));
- urlConnection.getOutputStream().write(entity.getOutputStream().toByteArray());
+
+ BufferedOutputStream outputStream = new BufferedOutputStream(urlConnection.getOutputStream());
+ outputStream.write(entity.getOutputStream().toByteArray());
+ outputStream.flush();
+ outputStream.close();
urlConnection.connect();
+
+ FileLog.e("tmessages", "response code = " + urlConnection.getResponseCode() + " message = " + urlConnection.getResponseMessage());
} catch (IOException e) {
e.printStackTrace();
} finally {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/NativeLoader.java b/TMessagesProj/src/main/java/org/telegram/messenger/NativeLoader.java
index d7d39d55a..dc9c57316 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/NativeLoader.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/NativeLoader.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -23,7 +23,7 @@ import java.util.zip.ZipFile;
public class NativeLoader {
- private final static int LIB_VERSION = 15;
+ private final static int LIB_VERSION = 17;
private final static String LIB_NAME = "tmessages." + LIB_VERSION;
private final static String LIB_SO_NAME = "lib" + LIB_NAME + ".so";
private final static String LOCALE_LIB_SO_NAME = "lib" + LIB_NAME + "loc.so";
@@ -220,4 +220,5 @@ public class NativeLoader {
}
private static native void init(String path, boolean enable);
+ //public static native void crash();
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java
index 72694ab58..c01709277 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationCenter.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -66,6 +66,7 @@ public class NotificationCenter {
public static final int needShowAlert = totalEvents++;
public static final int didUpdatedMessagesViews = totalEvents++;
public static final int needReloadRecentDialogsSearch = totalEvents++;
+ public static final int locationPermissionGranted = totalEvents++;
public static final int httpFileDidLoaded = totalEvents++;
public static final int httpFileDidFailedLoad = totalEvents++;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationRepeat.java b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationRepeat.java
index 78dd91d05..c1e87f8b4 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationRepeat.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationRepeat.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsController.java b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsController.java
index cb9cb3bf2..fc10b9360 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsController.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsController.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -756,6 +756,8 @@ public class NotificationsController {
} else if (messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaDocument) {
if (messageObject.isSticker()) {
msg = LocaleController.formatString("NotificationMessageSticker", R.string.NotificationMessageSticker, name);
+ } else if (messageObject.isGif()) {
+ msg = LocaleController.formatString("NotificationMessageGif", R.string.NotificationMessageGif, name);
} else {
msg = LocaleController.formatString("NotificationMessageDocument", R.string.NotificationMessageDocument, name);
}
@@ -864,6 +866,8 @@ public class NotificationsController {
} else if (messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaDocument) {
if (messageObject.isSticker()) {
msg = LocaleController.formatString("ChannelMessageSticker", R.string.ChannelMessageSticker, name, chat.title);
+ } else if (messageObject.isGif()) {
+ msg = LocaleController.formatString("ChannelMessageGIF", R.string.ChannelMessageGIF, name, chat.title);
} else {
msg = LocaleController.formatString("ChannelMessageDocument", R.string.ChannelMessageDocument, name, chat.title);
}
@@ -888,6 +892,8 @@ public class NotificationsController {
} else if (messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaDocument) {
if (messageObject.isSticker()) {
msg = LocaleController.formatString("ChannelMessageGroupSticker", R.string.ChannelMessageGroupSticker, name, chat.title);
+ } else if (messageObject.isGif()) {
+ msg = LocaleController.formatString("ChannelMessageGroupGif", R.string.ChannelMessageGroupGif, name, chat.title);
} else {
msg = LocaleController.formatString("ChannelMessageGroupDocument", R.string.ChannelMessageGroupDocument, name, chat.title);
}
@@ -913,6 +919,8 @@ public class NotificationsController {
} else if (messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaDocument) {
if (messageObject.isSticker()) {
msg = LocaleController.formatString("NotificationMessageGroupSticker", R.string.NotificationMessageGroupSticker, name, chat.title);
+ } else if (messageObject.isGif()) {
+ msg = LocaleController.formatString("NotificationMessageGroupGif", R.string.NotificationMessageGroupGif, name, chat.title);
} else {
msg = LocaleController.formatString("NotificationMessageGroupDocument", R.string.NotificationMessageGroupDocument, name, chat.title);
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsService.java b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsService.java
index 857577cfd..065dc025b 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsService.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/NotificationsService.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/ScreenReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/ScreenReceiver.java
index 099167152..77f678de4 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/ScreenReceiver.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/ScreenReceiver.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/SecretChatHelper.java b/TMessagesProj/src/main/java/org/telegram/messenger/SecretChatHelper.java
index da18258e6..b1356568c 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/SecretChatHelper.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/SecretChatHelper.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -538,7 +538,7 @@ public class SecretChatHelper {
File cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName + ".jpg");
File cacheFile2 = FileLoader.getPathToAttach(size);
cacheFile.renameTo(cacheFile2);
- ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location);
+ ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location, true);
ArrayList arr = new ArrayList<>();
arr.add(newMsg);
MessagesStorage.getInstance().putMessages(arr, false, true, false, 0);
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java b/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java
index 5d541918e..8f9e5bb6e 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -143,7 +143,7 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
arr.remove(a);
a--;
} else if (encryptedFile != null && message.sendEncryptedRequest != null) {
- if (message.sendEncryptedRequest.media instanceof TLRPC.TL_decryptedMessageMediaVideo) {
+ if (message.sendEncryptedRequest.media instanceof TLRPC.TL_decryptedMessageMediaVideo || message.sendEncryptedRequest.media instanceof TLRPC.TL_decryptedMessageMediaPhoto) {
long size = (Long) args[5];
message.sendEncryptedRequest.media.size = (int) size;
}
@@ -206,7 +206,8 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
if (finalSize != 0) {
ArrayList arr = delayedMessages.get(messageObject.messageOwner.attachPath);
if (arr != null) {
- for (DelayedMessage message : arr) {
+ for (int a = 0; a < arr.size(); a++) {
+ DelayedMessage message = arr.get(a);
if (message.obj == messageObject) {
message.obj.videoEditedInfo = null;
message.obj.messageOwner.message = "-1";
@@ -440,14 +441,14 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
if (messageObject.messageOwner.media != null && !(messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaEmpty) && !(messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaWebPage)) {
if (messageObject.messageOwner.media.photo instanceof TLRPC.TL_photo) {
- sendMessage((TLRPC.TL_photo) messageObject.messageOwner.media.photo, null, null, did, messageObject.replyMessageObject, asAdmin);
+ sendMessage((TLRPC.TL_photo) messageObject.messageOwner.media.photo, null, did, messageObject.replyMessageObject, asAdmin, null);
} else if (messageObject.messageOwner.media.audio instanceof TLRPC.TL_audio) {
sendMessage((TLRPC.TL_audio) messageObject.messageOwner.media.audio, messageObject.messageOwner.attachPath, did, messageObject.replyMessageObject, asAdmin);
} else if (messageObject.messageOwner.media.video instanceof TLRPC.TL_video) {
TLRPC.TL_video video = (TLRPC.TL_video) messageObject.messageOwner.media.video;
- sendMessage(video, messageObject.videoEditedInfo, null, messageObject.messageOwner.attachPath, did, messageObject.replyMessageObject, asAdmin);
+ sendMessage(video, messageObject.videoEditedInfo, messageObject.messageOwner.attachPath, did, messageObject.replyMessageObject, asAdmin, null);
} else if (messageObject.messageOwner.media.document instanceof TLRPC.TL_document) {
- sendMessage((TLRPC.TL_document) messageObject.messageOwner.media.document, null, messageObject.messageOwner.attachPath, did, messageObject.replyMessageObject, asAdmin);
+ sendMessage((TLRPC.TL_document) messageObject.messageOwner.media.document, messageObject.messageOwner.attachPath, did, messageObject.replyMessageObject, asAdmin, null);
} else if (messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaVenue || messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaGeo) {
sendMessage(messageObject.messageOwner.media, did, messageObject.replyMessageObject, asAdmin);
} else if (messageObject.messageOwner.media.phone_number != null) {
@@ -467,7 +468,7 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
if (messageObject.messageOwner.media instanceof TLRPC.TL_messageMediaWebPage) {
webPage = messageObject.messageOwner.media.webpage;
}
- sendMessage(messageObject.messageOwner.message, did, messageObject.replyMessageObject, webPage, true, asAdmin);
+ sendMessage(messageObject.messageOwner.message, did, messageObject.replyMessageObject, webPage, true, asAdmin, messageObject.messageOwner.entities, null);
} else {
ArrayList arrayList = new ArrayList<>();
arrayList.add(messageObject);
@@ -522,11 +523,11 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
}
}
- SendMessagesHelper.getInstance().sendMessage((TLRPC.TL_document) document, null, null, peer, replyingMessageObject, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage((TLRPC.TL_document) document, null, peer, replyingMessageObject, asAdmin, null);
}
public void sendMessage(TLRPC.User user, long peer, MessageObject reply_to_msg, boolean asAdmin) {
- sendMessage(null, null, null, null, null, user, null, null, null, peer, null, reply_to_msg, null, true, asAdmin, null);
+ sendMessage(null, null, null, null, null, user, null, null, peer, null, reply_to_msg, null, true, asAdmin, null, null, null);
}
public void sendMessage(ArrayList messages, final long peer, boolean asAdmin) {
@@ -558,6 +559,9 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
if (msgObj.getId() <= 0) {
continue;
}
+ if (BuildVars.DEBUG_VERSION) {
+ FileLog.d("tmessages", "forward message with id = " + msgObj.getId());
+ }
final TLRPC.Message newMsg = new TLRPC.TL_message();
if (msgObj.isForwarded()) {
@@ -580,6 +584,10 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
if (isMegagroup) {
newMsg.flags |= TLRPC.MESSAGE_FLAG_MEGAGROUP;
}
+ if (msgObj.messageOwner.via_bot_id != 0) {
+ newMsg.via_bot_id = msgObj.messageOwner.via_bot_id;
+ newMsg.flags |= TLRPC.MESSAGE_FLAG_HAS_BOT_ID;
+ }
newMsg.message = msgObj.messageOwner.message;
newMsg.fwd_msg_id = msgObj.getId();
newMsg.attachPath = msgObj.messageOwner.attachPath;
@@ -694,7 +702,7 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
final ArrayList sentMessages = new ArrayList<>();
sentMessages.add(message);
newMsgObj.id = message.id;
- processSentMessage(newMsgObj, message, null);
+ processSentMessage(newMsgObj, message, null, true);
MessagesStorage.getInstance().getStorageQueue().postRunnable(new Runnable() {
@Override
public void run() {
@@ -727,7 +735,8 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
});
}
- for (final TLRPC.Message newMsgObj : newMsgObjArr) {
+ for (int a = 0; a < newMsgObjArr.size(); a++) {
+ final TLRPC.Message newMsgObj = newMsgObjArr.get(a);
MessagesStorage.getInstance().markMessageAsSendError(newMsgObj);
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
@@ -757,38 +766,43 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
public void sendMessage(MessageObject retryMessageObject, boolean asAdmin) {
- sendMessage(null, null, null, null, null, null, null, null, null, retryMessageObject.getDialogId(), retryMessageObject.messageOwner.attachPath, null, null, true, asAdmin, retryMessageObject);
+ sendMessage(null, null, null, null, null, null, null, null, retryMessageObject.getDialogId(), retryMessageObject.messageOwner.attachPath, null, null, true, asAdmin, retryMessageObject, null, retryMessageObject.messageOwner.params);
}
- public void sendMessage(TLRPC.TL_document document, String originalPath, String path, long peer, MessageObject reply_to_msg, boolean asAdmin) {
- sendMessage(null, null, null, null, null, null, document, null, originalPath, peer, path, reply_to_msg, null, true, asAdmin, null);
+ public void sendMessage(TLRPC.TL_document document, String path, long peer, MessageObject reply_to_msg, boolean asAdmin, HashMap params) {
+ sendMessage(null, null, null, null, null, null, document, null, peer, path, reply_to_msg, null, true, asAdmin, null, null, params);
}
- public void sendMessage(String message, long peer, MessageObject reply_to_msg, TLRPC.WebPage webPage, boolean searchLinks, boolean asAdmin) {
- sendMessage(message, null, null, null, null, null, null, null, null, peer, null, reply_to_msg, webPage, searchLinks, asAdmin, null);
+ public void sendMessage(String message, long peer, MessageObject reply_to_msg, TLRPC.WebPage webPage, boolean searchLinks, boolean asAdmin, ArrayList entities, HashMap params) {
+ sendMessage(message, null, null, null, null, null, null, null, peer, null, reply_to_msg, webPage, searchLinks, asAdmin, null, entities, params);
}
public void sendMessage(TLRPC.MessageMedia location, long peer, MessageObject reply_to_msg, boolean asAdmin) {
- sendMessage(null, location, null, null, null, null, null, null, null, peer, null, reply_to_msg, null, true, asAdmin, null);
+ sendMessage(null, location, null, null, null, null, null, null, peer, null, reply_to_msg, null, true, asAdmin, null, null, null);
}
- public void sendMessage(TLRPC.TL_photo photo, String originalPath, String path, long peer, MessageObject reply_to_msg, boolean asAdmin) {
- sendMessage(null, null, photo, null, null, null, null, null, originalPath, peer, path, reply_to_msg, null, true, asAdmin, null);
+ public void sendMessage(TLRPC.TL_photo photo, String path, long peer, MessageObject reply_to_msg, boolean asAdmin, HashMap params) {
+ sendMessage(null, null, photo, null, null, null, null, null, peer, path, reply_to_msg, null, true, asAdmin, null, null, params);
}
- public void sendMessage(TLRPC.TL_video video, VideoEditedInfo videoEditedInfo, String originalPath, String path, long peer, MessageObject reply_to_msg, boolean asAdmin) {
- sendMessage(null, null, null, video, videoEditedInfo, null, null, null, originalPath, peer, path, reply_to_msg, null, true, asAdmin, null);
+ public void sendMessage(TLRPC.TL_video video, VideoEditedInfo videoEditedInfo, String path, long peer, MessageObject reply_to_msg, boolean asAdmin, HashMap params) {
+ sendMessage(null, null, null, video, videoEditedInfo, null, null, null, peer, path, reply_to_msg, null, true, asAdmin, null, null, params);
}
public void sendMessage(TLRPC.TL_audio audio, String path, long peer, MessageObject reply_to_msg, boolean asAdmin) {
- sendMessage(null, null, null, null, null, null, null, audio, null, peer, path, reply_to_msg, null, true, asAdmin, null);
+ sendMessage(null, null, null, null, null, null, null, audio, peer, path, reply_to_msg, null, true, asAdmin, null, null, null);
}
- private void sendMessage(String message, TLRPC.MessageMedia location, TLRPC.TL_photo photo, TLRPC.TL_video video, VideoEditedInfo videoEditedInfo, TLRPC.User user, TLRPC.TL_document document, TLRPC.TL_audio audio, String originalPath, long peer, String path, MessageObject reply_to_msg, TLRPC.WebPage webPage, boolean searchLinks, boolean asAdmin, MessageObject retryMessageObject) {
+ private void sendMessage(String message, TLRPC.MessageMedia location, TLRPC.TL_photo photo, TLRPC.TL_video video, VideoEditedInfo videoEditedInfo, TLRPC.User user, TLRPC.TL_document document, TLRPC.TL_audio audio, long peer, String path, MessageObject reply_to_msg, TLRPC.WebPage webPage, boolean searchLinks, boolean asAdmin, MessageObject retryMessageObject, ArrayList entities, HashMap params) {
if (peer == 0) {
return;
}
+ String originalPath = null;
+ if (params != null && params.containsKey("originalPath")) {
+ originalPath = params.get("originalPath");
+ }
+
TLRPC.Message newMsg = null;
MessageObject newMsgObj = null;
int type = -1;
@@ -824,13 +838,21 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
} else {
if (retryMessageObject.type == 0) {
message = newMsg.message;
- type = 0;
+ if (params != null && params.containsKey("query_id")) {
+ type = 9;
+ } else {
+ type = 0;
+ }
} else if (retryMessageObject.type == 4) {
location = newMsg.media;
type = 1;
} else if (retryMessageObject.type == 1) {
photo = (TLRPC.TL_photo) newMsg.media.photo;
- type = 2;
+ if (params != null && params.containsKey("query_id")) {
+ type = 9;
+ } else {
+ type = 2;
+ }
} else if (retryMessageObject.type == 3) {
type = 3;
video = (TLRPC.TL_video) newMsg.media.video;
@@ -843,7 +865,11 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
type = 6;
} else if (retryMessageObject.type == 8 || retryMessageObject.type == 9 || retryMessageObject.type == 13) {
document = (TLRPC.TL_document) newMsg.media.document;
- type = 7;
+ if (params != null && params.containsKey("query_id")) {
+ type = 9;
+ } else {
+ type = 7;
+ }
} else if (retryMessageObject.type == 2) {
audio = (TLRPC.TL_audio) newMsg.media.audio;
type = 8;
@@ -856,13 +882,20 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
} else {
newMsg = new TLRPC.TL_message();
}
+ if (entities != null && !entities.isEmpty()) {
+ newMsg.entities = entities;
+ }
if (encryptedChat != null || webPage == null) {
newMsg.media = new TLRPC.TL_messageMediaEmpty();
} else {
newMsg.media = new TLRPC.TL_messageMediaWebPage();
newMsg.media.webpage = webPage;
}
- type = 0;
+ if (params != null && params.containsKey("query_id")) {
+ type = 9;
+ } else {
+ type = 0;
+ }
newMsg.message = message;
} else if (location != null) {
if (encryptedChat != null && AndroidUtilities.getPeerLayerVersion(encryptedChat.layer) >= 17) {
@@ -882,7 +915,11 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
newMsg.media = new TLRPC.TL_messageMediaPhoto();
newMsg.media.caption = photo.caption != null ? photo.caption : "";
newMsg.media.photo = photo;
- type = 2;
+ if (params != null && params.containsKey("query_id")) {
+ type = 9;
+ } else {
+ type = 2;
+ }
newMsg.message = "-1";
if (path != null && path.length() > 0 && path.startsWith("http")) {
newMsg.attachPath = path;
@@ -932,8 +969,13 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
newMsg = new TLRPC.TL_message();
}
newMsg.media = new TLRPC.TL_messageMediaDocument();
+ newMsg.media.caption = document.caption != null ? document.caption : "";
newMsg.media.document = document;
- type = 7;
+ if (params != null && params.containsKey("query_id")) {
+ type = 9;
+ } else {
+ type = 7;
+ }
newMsg.message = "-1";
newMsg.attachPath = path;
} else if (audio != null) {
@@ -964,6 +1006,11 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
if (newMsg.random_id == 0) {
newMsg.random_id = getNextRandomId();
}
+ if (params != null && params.containsKey("bot")) {
+ newMsg.via_bot_id = Utilities.parseInt(params.get("bot"));
+ newMsg.flags |= TLRPC.MESSAGE_FLAG_HAS_BOT_ID;
+ }
+ newMsg.params = params;
newMsg.date = ConnectionsManager.getInstance().getCurrentTime();
newMsg.flags |= TLRPC.MESSAGE_FLAG_HAS_MEDIA;
if (encryptedChat == null && high_id != 1 && newMsg.media instanceof TLRPC.TL_messageMediaAudio) {
@@ -1163,27 +1210,35 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
inputMedia.last_name = user.last_name;
} else if (type == 7) {
if (document.access_hash == 0) {
- if (document.thumb.location != null && document.thumb.location instanceof TLRPC.TL_fileLocation) {
- inputMedia = new TLRPC.TL_inputMediaUploadedThumbDocument();
+ if (originalPath != null && originalPath.length() > 0 && originalPath.startsWith("http") && params != null) {
+ inputMedia = new TLRPC.TL_inputMediaGifExternal();
+ String args[] = params.get("url").split("\\|");
+ if (args.length == 2) {
+ inputMedia.url = args[0];
+ inputMedia.q = args[1];
+ }
} else {
- inputMedia = new TLRPC.TL_inputMediaUploadedDocument();
+ if (document.thumb.location != null && document.thumb.location instanceof TLRPC.TL_fileLocation) {
+ inputMedia = new TLRPC.TL_inputMediaUploadedThumbDocument();
+ } else {
+ inputMedia = new TLRPC.TL_inputMediaUploadedDocument();
+ }
+ delayedMessage = new DelayedMessage();
+ delayedMessage.originalPath = originalPath;
+ delayedMessage.type = 2;
+ delayedMessage.obj = newMsgObj;
+ delayedMessage.documentLocation = document;
+ delayedMessage.location = document.thumb.location;
}
inputMedia.mime_type = document.mime_type;
inputMedia.attributes = document.attributes;
- delayedMessage = new DelayedMessage();
- delayedMessage.originalPath = originalPath;
- delayedMessage.type = 2;
- delayedMessage.obj = newMsgObj;
- if (path != null && path.length() > 0 && path.startsWith("http")) {
- delayedMessage.httpLocation = path;
- }
- delayedMessage.documentLocation = document;
- delayedMessage.location = document.thumb.location;
+ inputMedia.caption = document.caption != null ? document.caption : "";
} else {
TLRPC.TL_inputMediaDocument media = new TLRPC.TL_inputMediaDocument();
media.id = new TLRPC.TL_inputDocument();
media.id.id = document.id;
media.id.access_hash = document.access_hash;
+ media.caption = document.caption != null ? document.caption : "";
inputMedia = media;
}
} else if (type == 8) {
@@ -1254,7 +1309,7 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
} else if (type == 6) {
performSendMessageRequest(reqSend, newMsgObj.messageOwner, null);
} else if (type == 7) {
- if (document.access_hash == 0) {
+ if (document.access_hash == 0 && delayedMessage != null) {
performSendDelayedMessage(delayedMessage);
} else {
performSendMessageRequest(reqSend, newMsgObj.messageOwner, originalPath);
@@ -1464,6 +1519,20 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
reqSend.broadcast = true;
}
performSendMessageRequest(reqSend, newMsgObj.messageOwner, null);
+ } else if (type == 9) {
+ TLRPC.TL_messages_sendInlineBotResult reqSend = new TLRPC.TL_messages_sendInlineBotResult();
+ reqSend.peer = sendToPeer;
+ if (asAdmin && sendToPeer instanceof TLRPC.TL_inputPeerChannel) {
+ reqSend.broadcast = true;
+ }
+ reqSend.random_id = newMsg.random_id;
+ if (reply_to_msg != null) {
+ reqSend.flags |= 1;
+ reqSend.reply_to_msg_id = reply_to_msg.getId();
+ }
+ reqSend.query_id = Utilities.parseLong(params.get("query_id"));
+ reqSend.id = params.get("id");
+ performSendMessageRequest(reqSend, newMsgObj.messageOwner, null);
}
} catch (Exception e) {
FileLog.e("tmessages", e);
@@ -1607,110 +1676,120 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
putToSendingMessages(newMsgObj);
ConnectionsManager.getInstance().sendRequest(req, new RequestDelegate() {
@Override
- public void run(TLObject response, final TLRPC.TL_error error) {
- boolean isSentError = false;
- if (error == null) {
- final int oldId = newMsgObj.id;
- final boolean isBroadcast = req instanceof TLRPC.TL_messages_sendBroadcast;
- final ArrayList sentMessages = new ArrayList<>();
- final String attachPath = newMsgObj.attachPath;
- if (response instanceof TLRPC.TL_updateShortSentMessage) {
- TLRPC.TL_updateShortSentMessage res = (TLRPC.TL_updateShortSentMessage) response;
- newMsgObj.local_id = newMsgObj.id = res.id;
- newMsgObj.date = res.date;
- newMsgObj.entities = res.entities;
- newMsgObj.out = res.out;
- newMsgObj.unread = res.unread;
- if (res.media != null) {
- newMsgObj.media = res.media;
- newMsgObj.flags |= TLRPC.MESSAGE_FLAG_HAS_MEDIA;
- }
- if (!newMsgObj.entities.isEmpty()) {
- newMsgObj.flags |= TLRPC.MESSAGE_FLAG_HAS_ENTITIES;
- }
- MessagesController.getInstance().processNewDifferenceParams(-1, res.pts, res.date, res.pts_count);
- sentMessages.add(newMsgObj);
- } else if (response instanceof TLRPC.Updates) {
- boolean ok = false;
- for (TLRPC.Update update : ((TLRPC.Updates) response).updates) {
- if (update instanceof TLRPC.TL_updateNewMessage) {
- TLRPC.TL_updateNewMessage newMessage = (TLRPC.TL_updateNewMessage) update;
- sentMessages.add(newMessage.message);
- newMsgObj.id = newMessage.message.id;
- processSentMessage(newMsgObj, newMessage.message, originalPath);
- MessagesController.getInstance().processNewDifferenceParams(-1, newMessage.pts, -1, newMessage.pts_count);
- ok = true;
- break;
- } else if (update instanceof TLRPC.TL_updateNewChannelMessage) {
- TLRPC.TL_updateNewChannelMessage newMessage = (TLRPC.TL_updateNewChannelMessage) update;
- sentMessages.add(newMessage.message);
- newMsgObj.id = newMessage.message.id;
- if ((newMsgObj.flags & TLRPC.MESSAGE_FLAG_MEGAGROUP) != 0) {
- newMessage.message.flags |= TLRPC.MESSAGE_FLAG_MEGAGROUP;
+ public void run(final TLObject response, final TLRPC.TL_error error) {
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ boolean isSentError = false;
+ if (error == null) {
+ final int oldId = newMsgObj.id;
+ final boolean isBroadcast = req instanceof TLRPC.TL_messages_sendBroadcast;
+ final ArrayList sentMessages = new ArrayList<>();
+ final String attachPath = newMsgObj.attachPath;
+ if (response instanceof TLRPC.TL_updateShortSentMessage) {
+ final TLRPC.TL_updateShortSentMessage res = (TLRPC.TL_updateShortSentMessage) response;
+ newMsgObj.local_id = newMsgObj.id = res.id;
+ newMsgObj.date = res.date;
+ newMsgObj.entities = res.entities;
+ newMsgObj.out = res.out;
+ newMsgObj.unread = res.unread;
+ if (res.media != null) {
+ newMsgObj.media = res.media;
+ newMsgObj.flags |= TLRPC.MESSAGE_FLAG_HAS_MEDIA;
}
- processSentMessage(newMsgObj, newMessage.message, originalPath);
- MessagesController.getInstance().processNewChannelDifferenceParams(newMessage.pts, newMessage.pts_count, newMessage.message.to_id.channel_id);
- ok = true;
- break;
- }
- }
- if (!ok) {
- isSentError = true;
- }
- }
-
- if (!isSentError) {
- MessagesStorage.getInstance().getStorageQueue().postRunnable(new Runnable() {
- @Override
- public void run() {
- MessagesStorage.getInstance().updateMessageStateAndId(newMsgObj.random_id, oldId, (isBroadcast ? oldId : newMsgObj.id), 0, false, newMsgObj.to_id.channel_id);
- MessagesStorage.getInstance().putMessages(sentMessages, true, false, isBroadcast, 0);
- if (isBroadcast) {
- ArrayList currentMessage = new ArrayList<>();
- currentMessage.add(newMsgObj);
- newMsgObj.send_state = MessageObject.MESSAGE_SEND_STATE_SENT;
- MessagesStorage.getInstance().putMessages(currentMessage, true, false, false, 0);
+ if (!newMsgObj.entities.isEmpty()) {
+ newMsgObj.flags |= TLRPC.MESSAGE_FLAG_HAS_ENTITIES;
}
- AndroidUtilities.runOnUIThread(new Runnable() {
+ Utilities.stageQueue.postRunnable(new Runnable() {
@Override
public void run() {
- newMsgObj.send_state = MessageObject.MESSAGE_SEND_STATE_SENT;
- if (isBroadcast) {
- for (TLRPC.Message message : sentMessages) {
- ArrayList arr = new ArrayList<>();
- MessageObject messageObject = new MessageObject(message, null, false);
- arr.add(messageObject);
- MessagesController.getInstance().updateInterfaceWithMessages(messageObject.getDialogId(), arr, true);
- }
- NotificationCenter.getInstance().postNotificationName(NotificationCenter.dialogsNeedReload);
- }
- NotificationCenter.getInstance().postNotificationName(NotificationCenter.messageReceivedByServer, oldId, (isBroadcast ? oldId : newMsgObj.id), newMsgObj, newMsgObj.dialog_id);
- processSentMessage(oldId);
- removeFromSendingMessages(oldId);
+ MessagesController.getInstance().processNewDifferenceParams(-1, res.pts, res.date, res.pts_count);
}
});
- if (newMsgObj.media instanceof TLRPC.TL_messageMediaVideo) {
- stopVideoService(attachPath);
+ sentMessages.add(newMsgObj);
+ } else if (response instanceof TLRPC.Updates) {
+ ArrayList updates = ((TLRPC.Updates) response).updates;
+ TLRPC.Message message = null;
+ for (int a = 0; a < updates.size(); a++) {
+ TLRPC.Update update = updates.get(a);
+ if (update instanceof TLRPC.TL_updateNewMessage) {
+ final TLRPC.TL_updateNewMessage newMessage = (TLRPC.TL_updateNewMessage) update;
+ sentMessages.add(message = newMessage.message);
+ newMsgObj.id = newMessage.message.id;
+ Utilities.stageQueue.postRunnable(new Runnable() {
+ @Override
+ public void run() {
+ MessagesController.getInstance().processNewDifferenceParams(-1, newMessage.pts, -1, newMessage.pts_count);
+ }
+ });
+ break;
+ } else if (update instanceof TLRPC.TL_updateNewChannelMessage) {
+ final TLRPC.TL_updateNewChannelMessage newMessage = (TLRPC.TL_updateNewChannelMessage) update;
+ sentMessages.add(message = newMessage.message);
+ if ((newMsgObj.flags & TLRPC.MESSAGE_FLAG_MEGAGROUP) != 0) {
+ newMessage.message.flags |= TLRPC.MESSAGE_FLAG_MEGAGROUP;
+ }
+ Utilities.stageQueue.postRunnable(new Runnable() {
+ @Override
+ public void run() {
+ MessagesController.getInstance().processNewChannelDifferenceParams(newMessage.pts, newMessage.pts_count, newMessage.message.to_id.channel_id);
+ }
+ });
+ break;
+ }
+ }
+ if (message != null) {
+ newMsgObj.id = message.id;
+ processSentMessage(newMsgObj, message, originalPath, false);
+ } else {
+ isSentError = true;
}
}
- });
- }
- } else {
- AndroidUtilities.runOnUIThread(new Runnable() {
- @Override
- public void run() {
+
+ if (!isSentError) {
+ newMsgObj.send_state = MessageObject.MESSAGE_SEND_STATE_SENT;
+ MessagesStorage.getInstance().getStorageQueue().postRunnable(new Runnable() {
+ @Override
+ public void run() {
+ MessagesStorage.getInstance().updateMessageStateAndId(newMsgObj.random_id, oldId, (isBroadcast ? oldId : newMsgObj.id), 0, false, newMsgObj.to_id.channel_id);
+ MessagesStorage.getInstance().putMessages(sentMessages, true, false, isBroadcast, 0);
+ if (isBroadcast) {
+ ArrayList currentMessage = new ArrayList<>();
+ currentMessage.add(newMsgObj);
+ MessagesStorage.getInstance().putMessages(currentMessage, true, false, false, 0);
+ }
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ if (isBroadcast) {
+ for (int a = 0; a < sentMessages.size(); a++) {
+ TLRPC.Message message = sentMessages.get(a);
+ ArrayList arr = new ArrayList<>();
+ MessageObject messageObject = new MessageObject(message, null, false);
+ arr.add(messageObject);
+ MessagesController.getInstance().updateInterfaceWithMessages(messageObject.getDialogId(), arr, true);
+ }
+ NotificationCenter.getInstance().postNotificationName(NotificationCenter.dialogsNeedReload);
+ }
+ NotificationCenter.getInstance().postNotificationName(NotificationCenter.messageReceivedByServer, oldId, (isBroadcast ? oldId : newMsgObj.id), newMsgObj, newMsgObj.dialog_id);
+ processSentMessage(oldId);
+ removeFromSendingMessages(oldId);
+ }
+ });
+ if (newMsgObj.media instanceof TLRPC.TL_messageMediaVideo) {
+ stopVideoService(attachPath);
+ }
+ }
+ });
+ }
+ } else {
if (error.text.equals("PEER_FLOOD")) {
NotificationCenter.getInstance().postNotificationName(NotificationCenter.needShowAlert, 0);
}
+ isSentError = true;
}
- });
- isSentError = true;
- }
- if (isSentError) {
- MessagesStorage.getInstance().markMessageAsSendError(newMsgObj);
- AndroidUtilities.runOnUIThread(new Runnable() {
- @Override
- public void run() {
+ if (isSentError) {
+ MessagesStorage.getInstance().markMessageAsSendError(newMsgObj);
newMsgObj.send_state = MessageObject.MESSAGE_SEND_STATE_SEND_ERROR;
NotificationCenter.getInstance().postNotificationName(NotificationCenter.messageSendError, newMsgObj.id);
processSentMessage(newMsgObj.id);
@@ -1719,8 +1798,8 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
removeFromSendingMessages(newMsgObj.id);
}
- });
- }
+ }
+ });
}
}, new QuickAckDelegate() {
@Override
@@ -1737,38 +1816,45 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}, ConnectionsManager.RequestFlagCanCompress | ConnectionsManager.RequestFlagInvokeAfter | (req instanceof TLRPC.TL_messages_sendMessage ? ConnectionsManager.RequestFlagNeedQuickAck : 0));
}
- private void processSentMessage(TLRPC.Message newMsg, TLRPC.Message sentMessage, String originalPath) {
+ private void processSentMessage(TLRPC.Message newMsg, TLRPC.Message sentMessage, String originalPath, boolean post) {
if (sentMessage == null) {
return;
}
if (sentMessage.media instanceof TLRPC.TL_messageMediaPhoto && sentMessage.media.photo != null && newMsg.media instanceof TLRPC.TL_messageMediaPhoto && newMsg.media.photo != null) {
MessagesStorage.getInstance().putSentFile(originalPath, sentMessage.media.photo, 0);
- for (TLRPC.PhotoSize size : sentMessage.media.photo.sizes) {
- if (size == null || size.location == null || size instanceof TLRPC.TL_photoSizeEmpty || size.type == null) {
- continue;
- }
- for (TLRPC.PhotoSize size2 : newMsg.media.photo.sizes) {
- if (size2 == null || size2.location == null || size2.type == null) {
+ if (newMsg.media.photo.sizes.size() == 1 && newMsg.media.photo.sizes.get(0).location instanceof TLRPC.TL_fileLocationUnavailable) {
+ newMsg.media.photo.sizes = sentMessage.media.photo.sizes;
+ } else {
+ for (int a = 0; a < sentMessage.media.photo.sizes.size(); a++) {
+ TLRPC.PhotoSize size = sentMessage.media.photo.sizes.get(a);
+ if (size == null || size.location == null || size instanceof TLRPC.TL_photoSizeEmpty || size.type == null) {
continue;
}
- if (size2.location.volume_id == Integer.MIN_VALUE && size.type.equals(size2.type) || size.w == size2.w && size.h == size2.h) {
- String fileName = size2.location.volume_id + "_" + size2.location.local_id;
- String fileName2 = size.location.volume_id + "_" + size.location.local_id;
- if (fileName.equals(fileName2)) {
+ for (int b = 0; b < newMsg.media.photo.sizes.size(); b++) {
+ TLRPC.PhotoSize size2 = newMsg.media.photo.sizes.get(b);
+ if (size2 == null || size2.location == null || size2.type == null) {
+ continue;
+ }
+ if (size2.location.volume_id == Integer.MIN_VALUE && size.type.equals(size2.type) || size.w == size2.w && size.h == size2.h) {
+ String fileName = size2.location.volume_id + "_" + size2.location.local_id;
+ String fileName2 = size.location.volume_id + "_" + size.location.local_id;
+ if (fileName.equals(fileName2)) {
+ break;
+ }
+ File cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName + ".jpg");
+ File cacheFile2;
+ if (sentMessage.media.photo.sizes.size() == 1 || size.w > 90 || size.h > 90) {
+ cacheFile2 = FileLoader.getPathToAttach(size);
+ } else {
+ cacheFile2 = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName2 + ".jpg");
+ }
+ cacheFile.renameTo(cacheFile2);
+ ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location, post);
+ size2.location = size.location;
+ size2.size = size.size;
break;
}
- File cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName + ".jpg");
- File cacheFile2;
- if (sentMessage.media.photo.sizes.size() == 1 || size.w > 90 || size.h > 90) {
- cacheFile2 = FileLoader.getPathToAttach(size);
- } else {
- cacheFile2 = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName2 + ".jpg");
- }
- cacheFile.renameTo(cacheFile2);
- ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location);
- size2.location = size.location;
- break;
}
}
}
@@ -1788,7 +1874,7 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
File cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName + ".jpg");
File cacheFile2 = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName2 + ".jpg");
cacheFile.renameTo(cacheFile2);
- ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location);
+ ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location, post);
size2.location = size.location;
}
}
@@ -1819,8 +1905,9 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
File cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName + ".jpg");
File cacheFile2 = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), fileName2 + ".jpg");
cacheFile.renameTo(cacheFile2);
- ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location);
+ ImageLoader.getInstance().replaceImageInCache(fileName, fileName2, size.location, post);
size2.location = size.location;
+ size2.size = size.size;
}
} else if (MessageObject.isStickerMessage(sentMessage) && size2 != null && size2.location != null) {
size.location = size2.location;
@@ -1830,6 +1917,12 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
newMsg.media.document.id = sentMessage.media.document.id;
newMsg.media.document.access_hash = sentMessage.media.document.access_hash;
newMsg.media.document.attributes = sentMessage.media.document.attributes;
+ newMsg.media.document.size = sentMessage.media.document.size;
+ newMsg.media.document.mime_type = sentMessage.media.document.mime_type;
+
+ if (MessageObject.isOut(sentMessage) && MessageObject.isNewGifDocument(sentMessage.media.document)) {
+ MessagesController.addNewGifToRecent(sentMessage.media.document, sentMessage.date);
+ }
if (newMsg.attachPath != null && newMsg.attachPath.startsWith(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE).getAbsolutePath())) {
File cacheFile = new File(newMsg.attachPath);
@@ -1840,7 +1933,7 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
} else {
newMsg.attachPath = "";
if (originalPath != null && originalPath.startsWith("http")) {
- MessagesStorage.getInstance().addRecentLocalFile(originalPath, cacheFile2.toString());
+ MessagesStorage.getInstance().addRecentLocalFile(originalPath, cacheFile2.toString(), newMsg.media.document);
}
}
} else {
@@ -2075,13 +2168,16 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
}
+ final HashMap params = new HashMap<>();
+ if (originalPath != null) {
+ params.put("originalPath", originalPath);
+ }
final TLRPC.TL_document documentFinal = document;
- final String originalPathFinal = originalPath;
final String pathFinal = path;
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- SendMessagesHelper.getInstance().sendMessage(documentFinal, originalPathFinal, pathFinal, dialog_id, reply_to_msg, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage(documentFinal, pathFinal, dialog_id, reply_to_msg, asAdmin, params);
}
});
return true;
@@ -2139,12 +2235,15 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
}
- final String originalPathFinal = originalPath;
+ final HashMap params = new HashMap<>();
+ if (originalPath != null) {
+ params.put("originalPath", originalPath);
+ }
final TLRPC.TL_document documentFinal = document;
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- SendMessagesHelper.getInstance().sendMessage(documentFinal, originalPathFinal, messageObject.messageOwner.attachPath, dialog_id, reply_to_msg, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage(documentFinal, messageObject.messageOwner.attachPath, dialog_id, reply_to_msg, asAdmin, params);
}
});
}
@@ -2210,6 +2309,109 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
prepareSendingPhotos(paths, uris, dialog_id, reply_to_msg, captions, asAdmin);
}
+ public static void prepareSendingBotContextResult(final TLRPC.BotInlineResult result, final HashMap params, final long dialog_id, final MessageObject reply_to_msg, final boolean asAdmin) {
+ if (result == null) {
+ return;
+ }
+ if (result.send_message instanceof TLRPC.TL_botInlineMessageMediaAuto) {
+ new Thread(new Runnable() {
+ @Override
+ public void run() {
+ String finalPath = null;
+ TLRPC.TL_document document = null;
+ TLRPC.TL_photo photo = null;
+ if (result instanceof TLRPC.TL_botInlineMediaResultDocument) {
+ if (result.document != null) {
+ if (result.document instanceof TLRPC.TL_document) {
+ document = (TLRPC.TL_document) result.document;
+ }
+ }
+ } else if (result instanceof TLRPC.TL_botInlineMediaResultPhoto) {
+ if (result.photo != null) {
+ if (result.photo instanceof TLRPC.TL_photo) {
+ photo = (TLRPC.TL_photo) result.photo;
+ }
+ }
+ } else {
+
+ if (result.content_url != null) {
+ finalPath = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), Utilities.MD5(result.content_url) + "." + ImageLoader.getHttpUrlExtension(result.content_url)).getAbsolutePath();
+ if (result.type.equals("gif")){
+ document = new TLRPC.TL_document();
+ document.id = 0;
+ document.date = ConnectionsManager.getInstance().getCurrentTime();
+ TLRPC.TL_documentAttributeFilename fileName = new TLRPC.TL_documentAttributeFilename();
+ fileName.file_name = "animation.gif";
+ document.attributes.add(fileName);
+ document.size = 0;
+ document.dc_id = 0;
+ document.mime_type = "image/gif";
+ try {
+ Bitmap bitmap;
+ if (finalPath.endsWith("mp4")) {
+ bitmap = ThumbnailUtils.createVideoThumbnail(finalPath, MediaStore.Video.Thumbnails.MINI_KIND);
+ } else {
+ bitmap = ImageLoader.loadBitmap(finalPath, null, 90, 90, true);
+ }
+ if (bitmap != null) {
+ document.thumb = ImageLoader.scaleAndSaveImage(bitmap, 90, 90, 55, false);
+ bitmap.recycle();
+ }
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ }
+ if (document.thumb == null) {
+ document.thumb = new TLRPC.TL_photoSize();
+ document.thumb.w = result.w;
+ document.thumb.h = result.h;
+ document.thumb.size = 0;
+ document.thumb.location = new TLRPC.TL_fileLocationUnavailable();
+ document.thumb.type = "x";
+ }
+ } else if (result.type.equals("photo")) {
+ File cacheFile = new File(finalPath);
+ if (cacheFile.exists()) {
+ photo = SendMessagesHelper.getInstance().generatePhotoSizes(cacheFile.toString(), null);
+ }
+ if (photo == null) {
+ photo = new TLRPC.TL_photo();
+ photo.date = ConnectionsManager.getInstance().getCurrentTime();
+ TLRPC.TL_photoSize photoSize = new TLRPC.TL_photoSize();
+ photoSize.w = result.w;
+ photoSize.h = result.h;
+ photoSize.size = 1;
+ photoSize.location = new TLRPC.TL_fileLocationUnavailable();
+ photoSize.type = "x";
+ photo.sizes.add(photoSize);
+ }
+ }
+ }
+ }
+ final String finalPathFinal = finalPath;
+ final TLRPC.TL_document finalDocument = document;
+ final TLRPC.TL_photo finalPhoto = photo;
+ if (params != null && result.content_url != null) {
+ params.put("originalPath", result.content_url);
+ }
+ AndroidUtilities.runOnUIThread(new Runnable() {
+ @Override
+ public void run() {
+ if (finalDocument != null) {
+ finalDocument.caption = result.send_message.caption;
+ SendMessagesHelper.getInstance().sendMessage(finalDocument, finalPathFinal, dialog_id, reply_to_msg, asAdmin, params);
+ } else if (finalPhoto != null) {
+ finalPhoto.caption = result.send_message.caption;
+ SendMessagesHelper.getInstance().sendMessage(finalPhoto, result.content_url, dialog_id, reply_to_msg, asAdmin, params);
+ }
+ }
+ });
+ }
+ }).run();
+ } else if (result.send_message instanceof TLRPC.TL_botInlineMessageText) {
+ SendMessagesHelper.getInstance().sendMessage(result.send_message.message, dialog_id, reply_to_msg, null, !result.send_message.no_webpage, asAdmin, result.send_message.entities, params);
+ }
+ }
+
public static void prepareSendingPhotosSearch(final ArrayList photos, final long dialog_id, final MessageObject reply_to_msg, final boolean asAdmin) {
if (photos == null || photos.isEmpty()) {
return;
@@ -2221,14 +2423,27 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
for (int a = 0; a < photos.size(); a++) {
final MediaController.SearchImage searchImage = photos.get(a);
if (searchImage.type == 1) {
+ final HashMap params = new HashMap<>();
TLRPC.TL_document document = null;
- if (!isEncrypted) {
- document = (TLRPC.TL_document) MessagesStorage.getInstance().getSentFile(searchImage.imageUrl, !isEncrypted ? 1 : 4);
+ File cacheFile;
+ if (searchImage.document instanceof TLRPC.TL_document) {
+ document = (TLRPC.TL_document) searchImage.document;
+ cacheFile = FileLoader.getPathToAttach(document, true);
+ } else {
+ if (!isEncrypted) {
+ TLRPC.Document doc = (TLRPC.Document) MessagesStorage.getInstance().getSentFile(searchImage.imageUrl, !isEncrypted ? 1 : 4);
+ if (doc instanceof TLRPC.TL_document) {
+ document = (TLRPC.TL_document) doc;
+ }
+ }
+ String md5 = Utilities.MD5(searchImage.imageUrl) + "." + ImageLoader.getHttpUrlExtension(searchImage.imageUrl);
+ cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), md5);
}
- String md5 = Utilities.MD5(searchImage.imageUrl) + "." + ImageLoader.getHttpUrlExtension(searchImage.imageUrl);
- File cacheFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), md5);
if (document == null) {
- File thumbFile;
+ if (searchImage.localUrl != null) {
+ params.put("url", searchImage.localUrl);
+ }
+ File thumbFile = null;
document = new TLRPC.TL_document();
document.id = 0;
document.date = ConnectionsManager.getInstance().getCurrentTime();
@@ -2242,7 +2457,9 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
thumbFile = cacheFile;
} else {
cacheFile = null;
- String thumb = Utilities.MD5(searchImage.thumbUrl) + "." + ImageLoader.getHttpUrlExtension(searchImage.imageUrl);
+ }
+ if (thumbFile == null) {
+ String thumb = Utilities.MD5(searchImage.thumbUrl) + "." + ImageLoader.getHttpUrlExtension(searchImage.thumbUrl);
thumbFile = new File(FileLoader.getInstance().getDirectory(FileLoader.MEDIA_DIR_CACHE), thumb);
if (!thumbFile.exists()) {
thumbFile = null;
@@ -2250,7 +2467,12 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
if (thumbFile != null) {
try {
- Bitmap bitmap = ImageLoader.loadBitmap(thumbFile.getAbsolutePath(), null, 90, 90, true);
+ Bitmap bitmap;
+ if (thumbFile.getAbsolutePath().endsWith("mp4")) {
+ bitmap = ThumbnailUtils.createVideoThumbnail(thumbFile.getAbsolutePath(), MediaStore.Video.Thumbnails.MINI_KIND);
+ } else {
+ bitmap = ImageLoader.loadBitmap(thumbFile.getAbsolutePath(), null, 90, 90, true);
+ }
if (bitmap != null) {
document.thumb = ImageLoader.scaleAndSaveImage(bitmap, 90, 90, 55, isEncrypted);
bitmap.recycle();
@@ -2258,7 +2480,8 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
} catch (Exception e) {
FileLog.e("tmessages", e);
}
- } else {
+ }
+ if (document.thumb == null) {
document.thumb = new TLRPC.TL_photoSize();
document.thumb.w = searchImage.width;
document.thumb.h = searchImage.height;
@@ -2268,13 +2491,19 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
}
}
+ if (searchImage.caption != null) {
+ document.caption = searchImage.caption.toString();
+ }
final TLRPC.TL_document documentFinal = document;
final String originalPathFinal = searchImage.imageUrl;
final String pathFinal = cacheFile == null ? searchImage.imageUrl : cacheFile.toString();
+ if (params != null && searchImage.imageUrl != null) {
+ params.put("originalPath", searchImage.imageUrl);
+ }
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- SendMessagesHelper.getInstance().sendMessage(documentFinal, originalPathFinal, pathFinal, dialog_id, reply_to_msg, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage(documentFinal, pathFinal, dialog_id, reply_to_msg, asAdmin, params);
}
});
} else {
@@ -2315,13 +2544,16 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
if (searchImage.caption != null) {
photo.caption = searchImage.caption.toString();
}
- final String originalPathFinal = searchImage.imageUrl;
final TLRPC.TL_photo photoFinal = photo;
final boolean needDownloadHttpFinal = needDownloadHttp;
+ final HashMap params = new HashMap<>();
+ if (searchImage.imageUrl != null) {
+ params.put("originalPath", searchImage.imageUrl);
+ }
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- SendMessagesHelper.getInstance().sendMessage(photoFinal, originalPathFinal, needDownloadHttpFinal ? searchImage.imageUrl : null, dialog_id, reply_to_msg, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage(photoFinal, needDownloadHttpFinal ? searchImage.imageUrl : null, dialog_id, reply_to_msg, asAdmin, params);
}
});
}
@@ -2360,7 +2592,7 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
int count = (int) Math.ceil(textFinal.length() / 4096.0f);
for (int a = 0; a < count; a++) {
String mess = textFinal.substring(a * 4096, Math.min((a + 1) * 4096, textFinal.length()));
- SendMessagesHelper.getInstance().sendMessage(mess, dialog_id, null, null, true, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage(mess, dialog_id, null, null, true, asAdmin, null, null);
}
}
}
@@ -2458,12 +2690,15 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
if (captions != null) {
photo.caption = captions.get(a);
}
- final String originalPathFinal = originalPath;
final TLRPC.TL_photo photoFinal = photo;
+ final HashMap params = new HashMap<>();
+ if (originalPath != null) {
+ params.put("originalPath", originalPath);
+ }
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- SendMessagesHelper.getInstance().sendMessage(photoFinal, originalPathFinal, null, dialog_id, reply_to_msg, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage(photoFinal, null, dialog_id, reply_to_msg, asAdmin, params);
}
});
}
@@ -2586,10 +2821,14 @@ public class SendMessagesHelper implements NotificationCenter.NotificationCenter
final TLRPC.TL_video videoFinal = video;
final String originalPathFinal = originalPath;
final String finalPath = path;
+ final HashMap params = new HashMap<>();
+ if (originalPath != null) {
+ params.put("originalPath", originalPath);
+ }
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
- SendMessagesHelper.getInstance().sendMessage(videoFinal, videoEditedInfo, originalPathFinal, finalPath, dialog_id, reply_to_msg, asAdmin);
+ SendMessagesHelper.getInstance().sendMessage(videoFinal, videoEditedInfo, finalPath, dialog_id, reply_to_msg, asAdmin, params);
}
});
} else {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/SmsListener.java b/TMessagesProj/src/main/java/org/telegram/messenger/SmsListener.java
index bb7fad473..816939e2a 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/SmsListener.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/SmsListener.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/TgChooserTargetService.java b/TMessagesProj/src/main/java/org/telegram/messenger/TgChooserTargetService.java
index fea43faaa..6f017e673 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/TgChooserTargetService.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/TgChooserTargetService.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java b/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java
index ffaa8848c..6cda1ba7d 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/UserConfig.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -68,7 +68,7 @@ public class UserConfig {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
SharedPreferences.Editor editor = preferences.edit();
editor.putBoolean("registeredForPush", registeredForPush);
- editor.putString("pushString", pushString);
+ editor.putString("pushString2", pushString);
editor.putInt("lastSendMessageId", lastSendMessageId);
editor.putInt("lastLocalId", lastLocalId);
editor.putString("contactsHash", contactsHash);
@@ -182,7 +182,7 @@ public class UserConfig {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
registeredForPush = preferences.getBoolean("registeredForPush", false);
- pushString = preferences.getString("pushString", "");
+ pushString = preferences.getString("pushString2", "");
lastSendMessageId = preferences.getInt("lastSendMessageId", -210000);
lastLocalId = preferences.getInt("lastLocalId", -210000);
contactsHash = preferences.getString("contactsHash", "");
@@ -209,7 +209,7 @@ public class UserConfig {
} else {
SharedPreferences preferences = ApplicationLoader.applicationContext.getSharedPreferences("userconfing", Context.MODE_PRIVATE);
registeredForPush = preferences.getBoolean("registeredForPush", false);
- pushString = preferences.getString("pushString", "");
+ pushString = preferences.getString("pushString2", "");
lastSendMessageId = preferences.getInt("lastSendMessageId", -210000);
lastLocalId = preferences.getInt("lastLocalId", -210000);
contactsHash = preferences.getString("contactsHash", "");
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/UserObject.java b/TMessagesProj/src/main/java/org/telegram/messenger/UserObject.java
index 525c2ed81..c0aa14438 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/UserObject.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/UserObject.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/Utilities.java b/TMessagesProj/src/main/java/org/telegram/messenger/Utilities.java
index b8a415250..a1019ba34 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/Utilities.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/Utilities.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -47,7 +47,8 @@ public class Utilities {
public native static void loadBitmap(String path, Bitmap bitmap, int scale, int width, int height, int stride);
public native static int pinBitmap(Bitmap bitmap);
- public native static void blurBitmap(Object bitmap, int radius, int unpin);
+ public native static int unpinBitmap(Bitmap bitmap);
+ public native static void blurBitmap(Object bitmap, int radius, int unpin, int width, int height, int stride);
public native static void calcCDT(ByteBuffer hsvBuffer, int width, int height, ByteBuffer buffer);
public native static boolean loadWebpImage(Bitmap bitmap, ByteBuffer buffer, int len, BitmapFactory.Options options, boolean unpin);
public native static int convertVideoFrame(ByteBuffer src, ByteBuffer dest, int destFormat, int width, int height, int padding, int swap);
@@ -74,6 +75,23 @@ public class Utilities {
return val;
}
+ public static Long parseLong(String value) {
+ if (value == null) {
+ return 0L;
+ }
+ Long val = 0L;
+ try {
+ Matcher matcher = pattern.matcher(value);
+ if (matcher.find()) {
+ String num = matcher.group(0);
+ val = Long.parseLong(num);
+ }
+ } catch (Exception e) {
+ FileLog.e("tmessages", e);
+ }
+ return val;
+ }
+
public static String parseIntToString(String value) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/VideoEditedInfo.java b/TMessagesProj/src/main/java/org/telegram/messenger/VideoEditedInfo.java
index 39bccc1e2..12c8666a7 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/VideoEditedInfo.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/VideoEditedInfo.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/VideoEncodingService.java b/TMessagesProj/src/main/java/org/telegram/messenger/VideoEncodingService.java
index 9176c2f12..704c8871e 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/VideoEncodingService.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/VideoEncodingService.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/WearReplyReceiver.java b/TMessagesProj/src/main/java/org/telegram/messenger/WearReplyReceiver.java
index f19e894fa..46371c2bf 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/WearReplyReceiver.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/WearReplyReceiver.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger;
@@ -31,7 +31,7 @@ public class WearReplyReceiver extends BroadcastReceiver {
if (dialog_id == 0 || max_id == 0) {
return;
}
- SendMessagesHelper.getInstance().sendMessage(text.toString(), dialog_id, null, null, true, false);
+ SendMessagesHelper.getInstance().sendMessage(text.toString(), dialog_id, null, null, true, false, null, null);
MessagesController.getInstance().markDialogAsRead(dialog_id, max_id, max_id, 0, true, false);
}
}
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/query/BotQuery.java b/TMessagesProj/src/main/java/org/telegram/messenger/query/BotQuery.java
index c4678265d..ab3f8a9f8 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/query/BotQuery.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/query/BotQuery.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.query;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/query/MessagesSearchQuery.java b/TMessagesProj/src/main/java/org/telegram/messenger/query/MessagesSearchQuery.java
index 9ae9d3f30..d4014a780 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/query/MessagesSearchQuery.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/query/MessagesSearchQuery.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.query;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/query/ReplyMessageQuery.java b/TMessagesProj/src/main/java/org/telegram/messenger/query/ReplyMessageQuery.java
index 554d421c8..2fea1da49 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/query/ReplyMessageQuery.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/query/ReplyMessageQuery.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.query;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/query/SharedMediaQuery.java b/TMessagesProj/src/main/java/org/telegram/messenger/query/SharedMediaQuery.java
index f90134ef0..d02912767 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/query/SharedMediaQuery.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/query/SharedMediaQuery.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.query;
@@ -172,7 +172,7 @@ public class SharedMediaQuery {
return false;
} else if (message.media instanceof TLRPC.TL_messageMediaPhoto ||
message.media instanceof TLRPC.TL_messageMediaVideo ||
- message.media instanceof TLRPC.TL_messageMediaDocument ||
+ message.media instanceof TLRPC.TL_messageMediaDocument && !MessageObject.isGifDocument(message.media.document) ||
message.media instanceof TLRPC.TL_messageMediaAudio) {
return true;
} else if (!message.entities.isEmpty()) {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/query/StickersQuery.java b/TMessagesProj/src/main/java/org/telegram/messenger/query/StickersQuery.java
index a0fbd5151..cbf933072 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/query/StickersQuery.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/query/StickersQuery.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.query;
@@ -62,7 +62,7 @@ public class StickersQuery {
}
public static void checkStickers() {
- if (!loadingStickers && (!stickersLoaded || loadDate < (System.currentTimeMillis() / 1000 - 60 * 60))) {
+ if (!loadingStickers && (!stickersLoaded || Math.abs(System.currentTimeMillis() / 1000 - loadDate) >= 60 * 60)) {
loadStickers(true, false);
}
}
@@ -332,7 +332,7 @@ public class StickersQuery {
Utilities.stageQueue.postRunnable(new Runnable() {
@Override
public void run() {
- if (cache && (res == null || date < (int) (System.currentTimeMillis() / 1000 - 60 * 60)) || !cache && res == null && hash == 0) {
+ if (cache && (res == null || Math.abs(System.currentTimeMillis() / 1000 - date) >= 60 * 60) || !cache && res == null && hash == 0) {
AndroidUtilities.runOnUIThread(new Runnable() {
@Override
public void run() {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/GridLayoutManager.java b/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/GridLayoutManager.java
index 0c9596a89..f347ce3c0 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/GridLayoutManager.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/GridLayoutManager.java
@@ -24,8 +24,6 @@ import android.util.SparseIntArray;
import android.view.View;
import android.view.ViewGroup;
-import org.telegram.messenger.support.widget.RecyclerView;
-
import java.util.Arrays;
/**
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/RecyclerView.java b/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/RecyclerView.java
index f780729bf..6f160865a 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/RecyclerView.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/support/widget/RecyclerView.java
@@ -4184,7 +4184,7 @@ public class RecyclerView extends ViewGroup implements ScrollingView, NestedScro
private SparseIntArray mMaxScrap = new SparseIntArray();
private int mAttachCount = 0;
- private static final int DEFAULT_MAX_SCRAP = 5;
+ private static final int DEFAULT_MAX_SCRAP = 10;
public void clear() {
mScrap.clear();
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/video/InputSurface.java b/TMessagesProj/src/main/java/org/telegram/messenger/video/InputSurface.java
index 454578b03..a1d72a535 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/video/InputSurface.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/video/InputSurface.java
@@ -27,7 +27,7 @@ import android.view.Surface;
@TargetApi(17)
public class InputSurface {
- private static final boolean VERBOSE = false;
+
private static final int EGL_RECORDABLE_ANDROID = 0x3142;
private static final int EGL_OPENGL_ES2_BIT = 4;
private EGLDisplay mEGLDisplay;
@@ -124,8 +124,7 @@ public class InputSurface {
private void checkEglError(String msg) {
boolean failed = false;
- int error;
- while ((error = EGL14.eglGetError()) != EGL14.EGL_SUCCESS) {
+ while (EGL14.eglGetError() != EGL14.EGL_SUCCESS) {
failed = true;
}
if (failed) {
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/video/MP4Builder.java b/TMessagesProj/src/main/java/org/telegram/messenger/video/MP4Builder.java
index eae9c50b8..943b58832 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/video/MP4Builder.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/video/MP4Builder.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.video;
@@ -378,7 +378,7 @@ public class MP4Builder {
SampleToChunkBox stsc = new SampleToChunkBox();
stsc.setEntries(new LinkedList());
- long lastOffset = -1;
+ long lastOffset;
int lastChunkNumber = 1;
int lastSampleCount = 0;
diff --git a/TMessagesProj/src/main/java/org/telegram/messenger/video/Mp4Movie.java b/TMessagesProj/src/main/java/org/telegram/messenger/video/Mp4Movie.java
index fcdc92232..81dc67d92 100644
--- a/TMessagesProj/src/main/java/org/telegram/messenger/video/Mp4Movie.java
+++ b/TMessagesProj/src/main/java/org/telegram/messenger/video/Mp4Movie.java
@@ -3,7 +3,7 @@
* It is licensed under GNU GPL v. 2 or later.
* You should have received a copy of the license in this archive (see LICENSE).
*
- * Copyright Nikolai Kudashov, 2013-2015.
+ * Copyright Nikolai Kudashov, 2013-2016.
*/
package org.telegram.messenger.video;
@@ -20,7 +20,7 @@ import java.util.ArrayList;
@TargetApi(16)
public class Mp4Movie {
private Matrix matrix = Matrix.ROTATE_0;
- private ArrayList