diff --git a/ChangeLog.md b/ChangeLog.md index f91afb4c4..05f32cec4 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1247,3 +1247,42 @@ * Updated Tor Snowflake bridge to version 2.10.1. * Fixed LAN rules handling for firewall. * Updated translations. + +**InviZible Pro beta 2.4.0** +* Improved Tor Obfs4 bridges censorship resistance. +* Updated Tor SnowFlake bridge stun servers. +* Various fixes and optimisations. + +**InviZible Pro beta 2.4.1** +* Display the destination port in real-time logs. +* Socks5 proxy fixes and improvements. +* Improved handling of local networks. +* Tor Browser, OnionShare, Orbot, Briar, Cwtch are excluded from Tor by default as they contain their own Tor instance. +* Fixes and optimizations. + +**InviZible Pro beta 2.4.2** +* Added IGMP protocol logging in real-time logs. +* Optimized performance and battery usage in VPN mode. +* Various fixes for Root mode. +* Added Dutch translation. +* Updated Polish, Portuguese (Brazil), Persian and Spanish translations. + +**InviZible Pro beta 2.4.3** +* Added x86_64 version for ChromeOS and emulators. +* Updated Tor. +* Updated Purple I2P to version 2.55.0. +* Updated default DNSCrypt servers. +* Optimized performance in VPN mode. +* Added Tamil translation. +* Updated Japanese, Chinese and Dutch translations. +* Fixes and optimizations. + +**InviZible Pro stable 7.0.0** +* Updated Purple I2P to version 2.55.0. +* Added x86_64 version for ChromeOS and emulators. +* Improved Tor Obfs4 bridges censorship resistance. +* Optimized performance and battery usage in VPN mode. +* Display the destination port in real-time logs. +* Socks5 proxy fixes and improvements. +* Added Dutch and Tamil translations. +* Various fixes and optimisations. diff --git a/Nflog-android b/Nflog-android index e55778261..2a35f95e0 160000 --- a/Nflog-android +++ b/Nflog-android @@ -1 +1 @@ -Subproject commit e5577826150fb29ad23009f209a5055e67b29bf9 +Subproject commit 2a35f95e0350882f6c8dfeef7273cb5c42c9fa3d diff --git a/PurpleI2PBuildScript b/PurpleI2PBuildScript index 02b42ec7c..30e8e8dd5 160000 --- a/PurpleI2PBuildScript +++ b/PurpleI2PBuildScript @@ -1 +1 @@ -Subproject commit 02b42ec7c90eea96213d0b6549a99fe266cdbcb7 +Subproject commit 30e8e8dd5d9cc8f0020136a9c3d4e6efae24d99f diff --git a/README.md b/README.md index 1ce390fa8..66dafd343 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ### [Google Play stable version](https://play.google.com/store/apps/details?id=pan.alexander.tordnscrypt.gp) -### [Download the latest version from Github](https://github.com/Gedsh/InviZible/releases/latest) +### [Download the latest version from GitHub](https://github.com/Gedsh/InviZible/releases/latest) ### [IzzyOnDroid F-Droid beta version](https://apt.izzysoft.de/fdroid/index/apk/pan.alexander.tordnscrypt) @@ -178,7 +178,7 @@ Please note that the XMR address has changed. The old address is no longer valid [GNU General Public License version 3](https://www.gnu.org/licenses/gpl-3.0.txt) -Copyright (c) 2019-2024 Garmatin Oleksandr invizible.soft@gmail.com +Copyright (c) 2019-2025 Garmatin Oleksandr invizible.soft@gmail.com All rights reserved diff --git a/TorBuildScript b/TorBuildScript index eabb5f64c..eef725f61 160000 --- a/TorBuildScript +++ b/TorBuildScript @@ -1 +1 @@ -Subproject commit eabb5f64c95f5607924002178212cb31fbf2e136 +Subproject commit eef725f612fc5f0fe3938d65bfa12db9b6f34cb9 diff --git a/Wiki/Screenshots/Firewall/firewall-vpn.png b/Wiki/Screenshots/Firewall/firewall-vpn.png new file mode 100644 index 000000000..8fe59c321 Binary files /dev/null and b/Wiki/Screenshots/Firewall/firewall-vpn.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_1.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_1.png new file mode 100755 index 000000000..15be86566 Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_1.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_2.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_2.png new file mode 100755 index 000000000..bf073cb96 Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_2.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_3.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_3.png new file mode 100755 index 000000000..4f19d2e66 Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_3.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_4.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_4.png new file mode 100755 index 000000000..b51c87eab Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_4.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_5.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_5.png new file mode 100755 index 000000000..5c6e7e2d7 Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_5.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_6.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_6.png new file mode 100755 index 000000000..4daae4438 Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_6.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_7.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_7.png new file mode 100755 index 000000000..ba266990b Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_7.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_8.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_8.png new file mode 100755 index 000000000..6c3c4a239 Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_8.png differ diff --git a/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_9.png b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_9.png new file mode 100755 index 000000000..ae9a0ee4f Binary files /dev/null and b/Wiki/Screenshots/InviZible-Rethink/Screenshot_invizible_rethink_9.png differ diff --git a/Wiki/Screenshots/Options/Tor circuits isolation/Screenshot_Tor_circuits_isolation_1.png b/Wiki/Screenshots/Options/Tor circuits isolation/Screenshot_Tor_circuits_isolation_1.png new file mode 100644 index 000000000..22f725dd7 Binary files /dev/null and b/Wiki/Screenshots/Options/Tor circuits isolation/Screenshot_Tor_circuits_isolation_1.png differ diff --git a/build.gradle b/build.gradle index 0ac11c14d..f993c2073 100644 --- a/build.gradle +++ b/build.gradle @@ -2,7 +2,7 @@ buildscript { ext { - kotlin_version = '1.9.23' + kotlin_version = '2.0.21' dagger_version = '2.52' multidex_version = "2.0.1" work_version = "2.9.1" @@ -23,6 +23,10 @@ buildscript { } } +plugins { + id 'com.google.devtools.ksp' version '2.0.21-1.0.27' apply false +} + allprojects { repositories { google() diff --git a/fastlane/metadata/android/en-US/changelogs/100240.txt b/fastlane/metadata/android/en-US/changelogs/100240.txt new file mode 100644 index 000000000..09aa1c3d0 --- /dev/null +++ b/fastlane/metadata/android/en-US/changelogs/100240.txt @@ -0,0 +1,4 @@ +**InviZible Pro beta 2.4.0** +* Improved Tor Obfs4 bridges censorship resistance. +* Updated Tor SnowFlake bridge stun servers. +* Various fixes and optimisations. diff --git a/fastlane/metadata/android/en-US/changelogs/100241.txt b/fastlane/metadata/android/en-US/changelogs/100241.txt new file mode 100644 index 000000000..919fee2ba --- /dev/null +++ b/fastlane/metadata/android/en-US/changelogs/100241.txt @@ -0,0 +1,6 @@ +**InviZible Pro beta 2.4.1** +* Display the destination port in real-time logs. +* Socks5 proxy fixes and improvements. +* Improved handling of local networks. +* Tor Browser, OnionShare, Orbot, Briar, Cwtch are excluded from Tor by default as they contain their own Tor instance. +* Fixes and optimizations. diff --git a/fastlane/metadata/android/en-US/changelogs/100242.txt b/fastlane/metadata/android/en-US/changelogs/100242.txt new file mode 100644 index 000000000..ab949ad4f --- /dev/null +++ b/fastlane/metadata/android/en-US/changelogs/100242.txt @@ -0,0 +1,6 @@ +**InviZible Pro beta 2.4.2** +* Added IGMP protocol logging in real-time logs. +* Optimized performance and battery usage in VPN mode. +* Various fixes for Root mode. +* Added Dutch translation. +* Updated Polish, Portuguese (Brazil), Persian and Spanish translations. diff --git a/fastlane/metadata/android/en-US/changelogs/100243.txt b/fastlane/metadata/android/en-US/changelogs/100243.txt new file mode 100644 index 000000000..b2a549e3d --- /dev/null +++ b/fastlane/metadata/android/en-US/changelogs/100243.txt @@ -0,0 +1,9 @@ +**InviZible Pro beta 2.4.3** +* Added x86_64 version for ChromeOS and emulators. +* Updated Tor. +* Updated Purple I2P to version 2.55.0. +* Updated default DNSCrypt servers. +* Optimized performance in VPN mode. +* Added Tamil translation. +* Updated Japanese, Chinese and Dutch translations. +* Fixes and optimizations. diff --git a/tordnscrypt/CMakeLists.txt b/tordnscrypt/CMakeLists.txt index ff6ca8b37..33e7ee968 100644 --- a/tordnscrypt/CMakeLists.txt +++ b/tordnscrypt/CMakeLists.txt @@ -24,7 +24,9 @@ target_link_libraries( invizible ${log-lib} ) if(${CMAKE_ANDROID_ARCH_ABI} STREQUAL "arm64-v8a") - target_compile_options(invizible PRIVATE -Ofast -march=armv8-a -flto=thin -funsafe-math-optimizations -ffast-math -ftree-vectorize -fvectorize -fslp-vectorize) -else() + target_compile_options(invizible PRIVATE -Ofast -march=armv8-a+simd -flto=thin -funsafe-math-optimizations -ffast-math -ftree-vectorize -fvectorize -fslp-vectorize) +elseif(${CMAKE_ANDROID_ARCH_ABI} STREQUAL "armeabi-v7a") target_compile_options(invizible PRIVATE -Ofast -march=armv7-a -flto=thin -funsafe-math-optimizations -ffast-math -ftree-vectorize -fvectorize -fslp-vectorize) +else () + target_compile_options(invizible PRIVATE -Ofast -march=x86-64 -flto=thin -funsafe-math-optimizations -ffast-math -ftree-vectorize -fvectorize -fslp-vectorize) endif() diff --git a/tordnscrypt/build.gradle b/tordnscrypt/build.gradle index cdeb7a811..493673c95 100644 --- a/tordnscrypt/build.gradle +++ b/tordnscrypt/build.gradle @@ -1,7 +1,7 @@ plugins { id 'com.android.application' id 'kotlin-android' - id 'kotlin-kapt' + id 'com.google.devtools.ksp' id "kotlin-parcelize" } @@ -16,7 +16,7 @@ android { fdroid{ applicationId "pan.alexander.tordnscrypt.stable" - versionName "6.9.1" + versionName "7.0.0" dimension = 'version' resValue 'string', 'package_name', applicationId } @@ -44,7 +44,7 @@ android { universal{ dimension = 'processor' resValue 'string', 'appProcVersion', 'universal' - versionCode = 4 + versionCode = 5 ndk { abiFilters 'armeabi-v7a', 'arm64-v8a' @@ -56,9 +56,9 @@ android { defaultConfig { minSdkVersion 19 targetSdkVersion 34 - versionCode 239 + versionCode 243 - resConfigs "en", "ru", "pl", "de", "fa", "fi", "in", "fr", "ja", "zh", "es", "pt", "pt-rBR", "el", "tr", "it", "uk", "bg", "ar" + resConfigs "en", "ru", "pl", "de", "fa", "fi", "in", "fr", "ja", "zh", "es", "pt", "pt-rBR", "el", "tr", "it", "uk", "bg", "ar", "nl", "ta" externalNativeBuild { cmake { @@ -69,7 +69,9 @@ android { testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" - buildConfigField "java.util.Date", "BUILD_TIME", "new java.util.Date(" + System.currentTimeMillis() + "L)" + def buildDate= System.getenv("SOURCE_DATE_EPOCH") == null + ? System.currentTimeMillis() : 1000 * Long.parseLong(System.getenv("SOURCE_DATE_EPOCH")) + buildConfigField "java.util.Date", "BUILD_TIME", "new java.util.Date(" + buildDate + "L)" vectorDrawables.useSupportLibrary = true } @@ -137,10 +139,15 @@ android { jvmTarget = '17' } - kapt { - javacOptions { - option("-Adagger.fastInit=enabled") - } + ksp { + arg("dagger.fastInit", "enabled") + } + + dependenciesInfo { + // Disables dependency metadata when building APKs. + includeInApk = false + // Disables dependency metadata when building Android App Bundles. + includeInBundle = false } } @@ -157,9 +164,9 @@ dependencies { implementation 'com.google.android.material:material:1.12.0' implementation 'androidx.constraintlayout:constraintlayout:2.1.4' implementation 'androidx.recyclerview:recyclerview:1.3.2' - implementation 'androidx.lifecycle:lifecycle-process:2.8.6' - implementation 'androidx.lifecycle:lifecycle-livedata-ktx:2.8.6' - implementation 'androidx.lifecycle:lifecycle-viewmodel-ktx:2.8.6' + implementation 'androidx.lifecycle:lifecycle-process:2.8.7' + implementation 'androidx.lifecycle:lifecycle-livedata-ktx:2.8.7' + implementation 'androidx.lifecycle:lifecycle-viewmodel-ktx:2.8.7' testImplementation 'junit:junit:4.13.2' androidTestImplementation 'androidx.test.ext:junit:1.2.1' androidTestImplementation 'androidx.test.espresso:espresso-core:3.6.1' @@ -172,7 +179,7 @@ dependencies { //DI implementation "com.google.dagger:dagger:$dagger_version" - kapt "com.google.dagger:dagger-compiler:$dagger_version" + ksp "com.google.dagger:dagger-compiler:$dagger_version" //Persistent Work Manager implementation "androidx.work:work-runtime-ktx:$work_version" } diff --git a/tordnscrypt/libs/arm64-v8a/libi2pd.so b/tordnscrypt/libs/arm64-v8a/libi2pd.so index 1c916b638..83bfe92e6 100644 Binary files a/tordnscrypt/libs/arm64-v8a/libi2pd.so and b/tordnscrypt/libs/arm64-v8a/libi2pd.so differ diff --git a/tordnscrypt/libs/arm64-v8a/libnflog.so b/tordnscrypt/libs/arm64-v8a/libnflog.so index 27072b64f..eb4886992 100755 Binary files a/tordnscrypt/libs/arm64-v8a/libnflog.so and b/tordnscrypt/libs/arm64-v8a/libnflog.so differ diff --git a/tordnscrypt/libs/arm64-v8a/libobfs4proxy.so b/tordnscrypt/libs/arm64-v8a/libobfs4proxy.so index 378649207..c2f4498ba 100755 Binary files a/tordnscrypt/libs/arm64-v8a/libobfs4proxy.so and b/tordnscrypt/libs/arm64-v8a/libobfs4proxy.so differ diff --git a/tordnscrypt/libs/arm64-v8a/libtor.so b/tordnscrypt/libs/arm64-v8a/libtor.so index 893ec4c37..d38ab8fe6 100644 Binary files a/tordnscrypt/libs/arm64-v8a/libtor.so and b/tordnscrypt/libs/arm64-v8a/libtor.so differ diff --git a/tordnscrypt/libs/armeabi-v7a/libi2pd.so b/tordnscrypt/libs/armeabi-v7a/libi2pd.so index 73ba155ba..1258afe15 100644 Binary files a/tordnscrypt/libs/armeabi-v7a/libi2pd.so and b/tordnscrypt/libs/armeabi-v7a/libi2pd.so differ diff --git a/tordnscrypt/libs/armeabi-v7a/libnflog.so b/tordnscrypt/libs/armeabi-v7a/libnflog.so index cfd2b28bb..00805e4a7 100755 Binary files a/tordnscrypt/libs/armeabi-v7a/libnflog.so and b/tordnscrypt/libs/armeabi-v7a/libnflog.so differ diff --git a/tordnscrypt/libs/armeabi-v7a/libobfs4proxy.so b/tordnscrypt/libs/armeabi-v7a/libobfs4proxy.so index 2ea934500..2d3b38a09 100755 Binary files a/tordnscrypt/libs/armeabi-v7a/libobfs4proxy.so and b/tordnscrypt/libs/armeabi-v7a/libobfs4proxy.so differ diff --git a/tordnscrypt/libs/armeabi-v7a/libtor.so b/tordnscrypt/libs/armeabi-v7a/libtor.so index f820fbc7e..810bdf2f6 100644 Binary files a/tordnscrypt/libs/armeabi-v7a/libtor.so and b/tordnscrypt/libs/armeabi-v7a/libtor.so differ diff --git a/tordnscrypt/libs/prebuild b/tordnscrypt/libs/prebuild index 1936edf89..14b5deafd 100755 --- a/tordnscrypt/libs/prebuild +++ b/tordnscrypt/libs/prebuild @@ -2,20 +2,20 @@ cd "$( dirname "${BASH_SOURCE[0]}" )" -lyrebird_version=workaround/ntor-auth-mismatch +lyrebird_version=prod dnscryptproxy_version=master snowflake_version=feat/check-dns-ip libzmq_version=v4.3.5 conjure_version=prod webtunnel_version=prod -tor_openssl_version=openssl-3.3.1 +tor_openssl_version=openssl-3.4.0 libevent_version=release-2.1.12-stable zstd_version=v1.4.9 xz_version=v5.2.4 tor_version=prod -i2pd_openssl_version=openssl-3.3.1 +i2pd_openssl_version=openssl-3.4.0 miniupnpc_version=miniupnpc_2_2_8 -i2pd_version=2.54.0 +i2pd_version=2.55.0 git clone --single-branch --branch $lyrebird_version https://gitlab.torproject.org/Gedsh/lyrebird diff --git a/tordnscrypt/libs/x86_64/libc++_shared.so b/tordnscrypt/libs/x86_64/libc++_shared.so new file mode 100755 index 000000000..085f34188 Binary files /dev/null and b/tordnscrypt/libs/x86_64/libc++_shared.so differ diff --git a/tordnscrypt/libs/x86_64/libconjure.so b/tordnscrypt/libs/x86_64/libconjure.so new file mode 100755 index 000000000..45d6b2c8b Binary files /dev/null and b/tordnscrypt/libs/x86_64/libconjure.so differ diff --git a/tordnscrypt/libs/x86_64/libdnscrypt-proxy.so b/tordnscrypt/libs/x86_64/libdnscrypt-proxy.so new file mode 100755 index 000000000..cc308d17c Binary files /dev/null and b/tordnscrypt/libs/x86_64/libdnscrypt-proxy.so differ diff --git a/tordnscrypt/libs/x86_64/libi2pd.so b/tordnscrypt/libs/x86_64/libi2pd.so new file mode 100644 index 000000000..39f3c41b6 Binary files /dev/null and b/tordnscrypt/libs/x86_64/libi2pd.so differ diff --git a/tordnscrypt/libs/x86_64/libnflog.so b/tordnscrypt/libs/x86_64/libnflog.so new file mode 100755 index 000000000..6f739a1ea Binary files /dev/null and b/tordnscrypt/libs/x86_64/libnflog.so differ diff --git a/tordnscrypt/libs/x86_64/libobfs4proxy.so b/tordnscrypt/libs/x86_64/libobfs4proxy.so new file mode 100755 index 000000000..b21d49e89 Binary files /dev/null and b/tordnscrypt/libs/x86_64/libobfs4proxy.so differ diff --git a/tordnscrypt/libs/x86_64/libsnowflake.so b/tordnscrypt/libs/x86_64/libsnowflake.so new file mode 100755 index 000000000..f53bb94a1 Binary files /dev/null and b/tordnscrypt/libs/x86_64/libsnowflake.so differ diff --git a/tordnscrypt/libs/x86_64/libtor.so b/tordnscrypt/libs/x86_64/libtor.so new file mode 100644 index 000000000..7a479feb7 Binary files /dev/null and b/tordnscrypt/libs/x86_64/libtor.so differ diff --git a/tordnscrypt/libs/x86_64/libwebtunnel.so b/tordnscrypt/libs/x86_64/libwebtunnel.so new file mode 100755 index 000000000..804d6ace0 Binary files /dev/null and b/tordnscrypt/libs/x86_64/libwebtunnel.so differ diff --git a/tordnscrypt/libs/x86_64/libzmq.so b/tordnscrypt/libs/x86_64/libzmq.so new file mode 100755 index 000000000..00b507f7d Binary files /dev/null and b/tordnscrypt/libs/x86_64/libzmq.so differ diff --git a/tordnscrypt/owner.gradle b/tordnscrypt/owner.gradle index f6cf31546..6de840c70 100644 --- a/tordnscrypt/owner.gradle +++ b/tordnscrypt/owner.gradle @@ -1,7 +1,7 @@ plugins { id 'com.android.application' id 'kotlin-android' - id 'kotlin-kapt' + id 'com.google.devtools.ksp' id "kotlin-parcelize" } @@ -35,32 +35,38 @@ android { productFlavors { lite { applicationId "pan.alexander.tordnscrypt.stable" - versionName "6.9.1" + versionName "7.0.0" dimension = 'version' signingConfig signingConfigs.stablesign resValue 'string', 'package_name', applicationId + dependenciesInfo.includeInApk false + dependenciesInfo.includeInBundle false } pro { applicationId "pan.alexander.tordnscrypt.stable" - versionName "6.9.1" + versionName "7.0.0" dimension = 'version' signingConfig signingConfigs.stablesign resValue 'string', 'package_name', applicationId + dependenciesInfo.includeInApk false + dependenciesInfo.includeInBundle false } beta { applicationId "pan.alexander.tordnscrypt" - versionName "2.3.9" + versionName "2.4.3" dimension = 'version' signingConfig signingConfigs.betasign resValue 'string', 'package_name', applicationId + dependenciesInfo.includeInApk false + dependenciesInfo.includeInBundle false } google_play { minSdkVersion 22 applicationId "pan.alexander.tordnscrypt.gp" - versionName "6.9.1" + versionName "7.0.0" dimension = 'version' signingConfig signingConfigs.stablesign resValue 'string', 'package_name', applicationId @@ -87,14 +93,24 @@ android { } } + x64 { + dimension = 'processor' + resValue 'string', 'appProcVersion', 'x86_64' + versionCode = 4 + + ndk { + abiFilters 'x86_64' + } + } + } defaultConfig { minSdkVersion 19 targetSdkVersion 34 - versionCode 239 + versionCode 243 - resConfigs "en", "ru", "pl", "de", "fa", "fi", "in", "fr", "ja", "zh", "es", "pt", "pt-rBR", "el", "tr", "it", "uk", "bg", "ar" + resConfigs "en", "ru", "pl", "de", "fa", "fi", "in", "fr", "ja", "zh", "es", "pt", "pt-rBR", "el", "tr", "it", "uk", "bg", "ar", "nl", "ta" externalNativeBuild { cmake { @@ -105,7 +121,9 @@ android { testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" - buildConfigField "java.util.Date", "BUILD_TIME", "new java.util.Date(" + System.currentTimeMillis() + "L)" + def buildDate= System.getenv("SOURCE_DATE_EPOCH") == null + ? System.currentTimeMillis() : 1000 * Long.parseLong(System.getenv("SOURCE_DATE_EPOCH")) + buildConfigField "java.util.Date", "BUILD_TIME", "new java.util.Date(" + buildDate + "L)" vectorDrawables.useSupportLibrary = true } @@ -179,10 +197,8 @@ android { jvmTarget = '17' } - kapt { - javacOptions { - option("-Adagger.fastInit=enabled") - } + ksp { + arg("dagger.fastInit", "enabled") } } @@ -201,10 +217,10 @@ dependencies { implementation 'com.google.android.material:material:1.12.0' implementation 'androidx.constraintlayout:constraintlayout:2.1.4' implementation 'androidx.recyclerview:recyclerview:1.3.2' - implementation 'androidx.lifecycle:lifecycle-process:2.8.6' - implementation 'androidx.lifecycle:lifecycle-livedata-ktx:2.8.6' - implementation 'androidx.lifecycle:lifecycle-viewmodel-ktx:2.8.6' - google_playImplementation 'com.android.billingclient:billing:6.2.0' + implementation 'androidx.lifecycle:lifecycle-process:2.8.7' + implementation 'androidx.lifecycle:lifecycle-livedata-ktx:2.8.7' + implementation 'androidx.lifecycle:lifecycle-viewmodel-ktx:2.8.7' + google_playImplementation 'com.android.billingclient:billing:7.1.1' testImplementation 'junit:junit:4.13.2' androidTestImplementation 'androidx.test.ext:junit:1.2.1' androidTestImplementation 'androidx.test.espresso:espresso-core:3.6.1' @@ -216,7 +232,7 @@ dependencies { //DI implementation "com.google.dagger:dagger:$dagger_version" - kapt "com.google.dagger:dagger-compiler:$dagger_version" + ksp "com.google.dagger:dagger-compiler:$dagger_version" //Persistent Work Manager implementation "androidx.work:work-runtime-ktx:$work_version" diff --git a/tordnscrypt/src/beta/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java b/tordnscrypt/src/beta/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java index 3f3b37333..0a905980d 100644 --- a/tordnscrypt/src/beta/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java +++ b/tordnscrypt/src/beta/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.assistance; diff --git a/tordnscrypt/src/debug/AndroidManifest.xml b/tordnscrypt/src/debug/AndroidManifest.xml index e4f9ff2fa..918a29760 100644 --- a/tordnscrypt/src/debug/AndroidManifest.xml +++ b/tordnscrypt/src/debug/AndroidManifest.xml @@ -15,7 +15,7 @@ ~ You should have received a copy of the GNU General Public License ~ along with InviZible Pro. If not, see . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ import javax.inject.Inject; diff --git a/tordnscrypt/src/google_play/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java b/tordnscrypt/src/google_play/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java index 84ed1013b..16fc181bd 100644 --- a/tordnscrypt/src/google_play/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java +++ b/tordnscrypt/src/google_play/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ import android.content.Context; @@ -37,6 +37,7 @@ import com.android.billingclient.api.BillingResult; import com.android.billingclient.api.ConsumeParams; import com.android.billingclient.api.ConsumeResponseListener; +import com.android.billingclient.api.PendingPurchasesParams; import com.android.billingclient.api.ProductDetails; import com.android.billingclient.api.ProductDetailsResponseListener; import com.android.billingclient.api.Purchase; @@ -129,7 +130,7 @@ public void initBilling() { signedData = getStoredData(); if (mBillingClient == null) { mBillingClient = BillingClient.newBuilder(context) - .enablePendingPurchases() + .enablePendingPurchases(PendingPurchasesParams.newBuilder().enableOneTimeProducts().build()) .setListener(new PurchasesUpdatedListener() { @Override public void onPurchasesUpdated(@NonNull BillingResult billingResult, @Nullable List purchasesList) { diff --git a/tordnscrypt/src/lite/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java b/tordnscrypt/src/lite/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java index 32cd8491f..c1412cd27 100644 --- a/tordnscrypt/src/lite/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java +++ b/tordnscrypt/src/lite/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ import javax.inject.Inject; diff --git a/tordnscrypt/src/main/assets/dnscrypt.mp3 b/tordnscrypt/src/main/assets/dnscrypt.mp3 index 97c82bbaa..438ed5b7e 100644 Binary files a/tordnscrypt/src/main/assets/dnscrypt.mp3 and b/tordnscrypt/src/main/assets/dnscrypt.mp3 differ diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/App.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/App.kt index a23d6ab63..d81ba561c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/App.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/App.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/AppLifecycleListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/AppLifecycleListener.kt index e3f53f63f..1798a5973 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/AppLifecycleListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/AppLifecycleListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/BootCompleteReceiver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/BootCompleteReceiver.java index 3261fc366..be93f67c0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/BootCompleteReceiver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/BootCompleteReceiver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/LangAppCompatActivity.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/LangAppCompatActivity.java index 66b62738c..0db5ad00f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/LangAppCompatActivity.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/LangAppCompatActivity.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/MainActivity.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/MainActivity.java index d84e0594c..7e00ee3ab 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/MainActivity.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/MainActivity.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/RootState.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/RootState.kt index 04a8b9d71..8649d2478 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/RootState.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/RootState.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragment.java index efcd0decb..c041cccdf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragmentViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragmentViewModel.kt index 842daa391..1e4dd7a45 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragmentViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/TopFragmentViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/about/AboutActivity.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/about/AboutActivity.java index 331ee7f06..9f001f378 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/about/AboutActivity.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/about/AboutActivity.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.about; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpRelatedUiUpdater.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpRelatedUiUpdater.kt index c079dfc3f..0cc6e3416 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpRelatedUiUpdater.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpRelatedUiUpdater.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScanner.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScanner.kt index 1c8898cf6..f14bbb90f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScanner.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScanner.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerHelper.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerHelper.kt index af59cbe67..b714b150b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerHelper.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerHelper.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerLoop.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerLoop.kt index 6c78eac87..67f00510d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerLoop.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpScannerLoop.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpTableManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpTableManager.kt index ab8b6bdf1..7d304fe6d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpTableManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpTableManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpWarningNotification.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpWarningNotification.kt index d8b9cb918..90a521998 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpWarningNotification.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ArpWarningNotification.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/CommandExecutor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/CommandExecutor.kt index fbd4a90e8..78d57de48 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/CommandExecutor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/CommandExecutor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ConnectionManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ConnectionManager.kt index 28a4b35e6..359554312 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ConnectionManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/ConnectionManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DNSRebindProtection.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DNSRebindProtection.kt index cddd5afdd..c39370d05 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DNSRebindProtection.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DNSRebindProtection.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DefaultGatewayManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DefaultGatewayManager.kt index 11987a63b..a6dc22ae0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DefaultGatewayManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/arp/DefaultGatewayManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupActivity.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupActivity.java index 8fe17dac0..01b3a57f3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupActivity.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupActivity.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.backup; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupFragment.java index fe316bea0..7ec7f9db0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.backup; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupHelper.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupHelper.java index ea9a16c69..7270efa6b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupHelper.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/BackupHelper.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.backup; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetHelper.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetHelper.kt index 7ec9b9bda..4fb0e95e9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetHelper.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetHelper.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.backup diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetModuleHelper.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetModuleHelper.kt index 214c67b5c..a01c3dd79 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetModuleHelper.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/ResetModuleHelper.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.backup diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/RestoreHelper.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/RestoreHelper.java index 480f81bba..f85328c83 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/RestoreHelper.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/backup/RestoreHelper.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.backup; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/crash_handling/TopExceptionHandler.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/crash_handling/TopExceptionHandler.kt index 97d2ce107..c62365a99 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/crash_handling/TopExceptionHandler.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/crash_handling/TopExceptionHandler.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.crash_handling diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSource.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSource.kt index b013c06f6..09a8e58c3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSource.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSource.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSourceImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSourceImpl.kt index 41452af10..7552554bc 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSourceImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesDataSourceImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesRepositoryImpl.kt index a07a32cac..345eddfdd 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/BridgesCountriesRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSource.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSource.kt index 99b03ed66..e016dcd4c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSource.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSource.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSourceImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSourceImpl.kt index e1ba7c788..43bbf7b0c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSourceImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeDataSourceImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeRepositoryImpl.kt index 73e73ad67..231ca9e6c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/DefaultVanillaBridgeRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RelayAddressFingerprint.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RelayAddressFingerprint.kt index 37f2f4f9d..7ce10dda5 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RelayAddressFingerprint.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RelayAddressFingerprint.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSource.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSource.kt index 9837d4a52..29ac07f2d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSource.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSource.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSourceImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSourceImpl.kt index a05b8b199..42226d652 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSourceImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesDataSourceImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesRepositoryImpl.kt index 89e1e25a4..fee7d0eb7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/bridges/RequestBridgesRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSource.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSource.kt index af4c52cf4..c6c6c9275 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSource.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSource.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.connection_checker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSourceImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSourceImpl.kt index 7b1d24de8..800faa14d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSourceImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerDataSourceImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.connection_checker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerRepositoryImpl.kt index a05cff151..4331c6d3b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_checker/ConnectionCheckerRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.connection_checker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsGetter.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsGetter.kt index 9f7150026..7bf41e83e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsGetter.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsGetter.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.connection_records diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsRepositoryImpl.kt index 357d40ef4..c24ff1557 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/ConnectionRecordsRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.connection_records diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/NflogRecordsGetter.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/NflogRecordsGetter.kt index d064b9618..ff96a1bd7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/NflogRecordsGetter.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/connection_records/NflogRecordsGetter.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.connection_records diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSource.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSource.kt index 224107e13..904185da9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSource.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSource.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.dns_resolver diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSourceImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSourceImpl.kt index 5d5a3d061..c1cea0632 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSourceImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsDataSourceImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.dns_resolver diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsRepositoryImpl.kt index 5c185beeb..105620c95 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_resolver/DnsRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.dns_resolver diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSource.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSource.kt index e8d108006..ec9c0b3b9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSource.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSource.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSourceImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSourceImpl.kt index b678e5806..07fbb8ae4 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSourceImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesDataSourceImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesRepositoryImpl.kt index 4b4e7f95e..638d2a389 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/dns_rules/DnsRulesRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/HtmlReader.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/HtmlReader.kt index 81b091e6b..0f3f5c014 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/HtmlReader.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/HtmlReader.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/ModulesLogRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/ModulesLogRepositoryImpl.kt index e3f78fe24..9c35ce2d0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/ModulesLogRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/ModulesLogRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/OwnFileReader.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/OwnFileReader.java index aaccc56fe..e431d7619 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/OwnFileReader.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/log_reader/OwnFileReader.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.log_reader; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSource.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSource.kt index e95561c64..31c9e8823 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSource.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSource.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.preferences diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSourceImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSourceImpl.kt index 3d844da53..44001ace5 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSourceImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceDataSourceImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.preferences diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceRepositoryImpl.kt index 204af147f..0283ba836 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/preferences/PreferenceRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.preferences diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/resources/ResourceRepositoryImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/resources/ResourceRepositoryImpl.kt index 8e8fe8cb2..3e2f37ae3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/resources/ResourceRepositoryImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/data/resources/ResourceRepositoryImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.data.resources diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppComponent.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppComponent.kt index 60ed98e69..75642747a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppComponent.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppComponent.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppSubcomponentModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppSubcomponentModule.kt index 84192cb4b..b2286c4cf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppSubcomponentModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/AppSubcomponentModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/CoroutinesModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/CoroutinesModule.kt index c821a6fcc..460e0e0d2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/CoroutinesModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/CoroutinesModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/DataSourcesModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/DataSourcesModule.kt index 5efcf8e19..9287a00d9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/DataSourcesModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/DataSourcesModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HandlerModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HandlerModule.kt index a18b08c48..3bc069ba2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HandlerModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HandlerModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HelpersModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HelpersModule.kt index 02a1b5451..899fb557e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HelpersModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/HelpersModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/InteractorsModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/InteractorsModule.kt index 16344ce3d..b2d41e344 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/InteractorsModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/InteractorsModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/RepositoryModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/RepositoryModule.kt index 7417c42ba..ad7666cbd 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/RepositoryModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/RepositoryModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SharedPreferencesModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SharedPreferencesModule.kt index 9ceebaf83..d298960ba 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SharedPreferencesModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SharedPreferencesModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SubcomponentsManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SubcomponentsManager.kt index 05e16fbea..68d213adf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SubcomponentsManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/SubcomponentsManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelFactory.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelFactory.kt index e4ce66f13..4cbafc8d7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelFactory.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelFactory.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelKey.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelKey.kt index c66a904fd..5635e408d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelKey.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelKey.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelModule.kt index d601764bc..aaa46db19 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/ViewModelModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpScope.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpScope.kt index 8a4f0e9f7..01e81963a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpScope.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpScope.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpSubcomponent.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpSubcomponent.kt index 7183e464e..6f55aeba8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpSubcomponent.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/arp/ArpSubcomponent.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.arp diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderInteractorsModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderInteractorsModule.kt index 2e0e5c300..159f153ab 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderInteractorsModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderInteractorsModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.logreader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderRepositoryModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderRepositoryModule.kt index 67d498bbc..aabf2fabf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderRepositoryModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderRepositoryModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.logreader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderScope.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderScope.kt index 1f823acd5..d15d94a06 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderScope.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderScope.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.logreader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderSubcomponent.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderSubcomponent.kt index 0deaa7715..c9348a92a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderSubcomponent.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/logreader/LogReaderSubcomponent.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.logreader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceScope.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceScope.kt index 94c87fc56..2fca863e7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceScope.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceScope.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.modulesservice diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponent.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponent.kt index 907dd9482..75573217b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponent.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponent.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.modulesservice diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponentModule.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponentModule.kt index bfdc564b1..c414633f2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponentModule.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/modulesservice/ModulesServiceSubcomponentModule.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.modulesservice diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesScope.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesScope.kt index 4999fbbec..7c871bd6b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesScope.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesScope.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesSubcomponent.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesSubcomponent.kt index 171a8cb82..5c56b2e37 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesSubcomponent.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/di/tiles/TilesSubcomponent.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.di.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AddDNSCryptServerDialogFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AddDNSCryptServerDialogFragment.java index 048e9d85e..026baf029 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AddDNSCryptServerDialogFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AddDNSCryptServerDialogFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AgreementDialog.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AgreementDialog.java index 7c4ef76ba..dcd0f6406 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AgreementDialog.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AgreementDialog.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskAccelerateDevelop.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskAccelerateDevelop.java index 304f745d5..e3c22f62d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskAccelerateDevelop.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskAccelerateDevelop.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskRestoreDefaultsDialog.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskRestoreDefaultsDialog.java index 73de2d05f..7fbebe808 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskRestoreDefaultsDialog.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/AskRestoreDefaultsDialog.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesCaptchaDialogFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesCaptchaDialogFragment.kt index 1fcf2b0da..47ab602ab 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesCaptchaDialogFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesCaptchaDialogFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesReadyDialogFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesReadyDialogFragment.kt index d07b84a65..c193e0143 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesReadyDialogFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/BridgesReadyDialogFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ChangeModeDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ChangeModeDialog.kt index b26b1fa8a..7d340014a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ChangeModeDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ChangeModeDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/DialogSaveConfigChanges.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/DialogSaveConfigChanges.java index b239af77a..973debc87 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/DialogSaveConfigChanges.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/DialogSaveConfigChanges.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ExtendedDialogFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ExtendedDialogFragment.java index 46a6d57c0..516b51f2e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ExtendedDialogFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/ExtendedDialogFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/FakeSniInputDialogFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/FakeSniInputDialogFragment.kt index afec7dc4a..4f20f1364 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/FakeSniInputDialogFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/FakeSniInputDialogFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NewUpdateDialogFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NewUpdateDialogFragment.java index b74db92d1..97b269bf3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NewUpdateDialogFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NewUpdateDialogFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationDialogFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationDialogFragment.java index 9767260c5..6ad868340 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationDialogFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationDialogFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationHelper.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationHelper.java index e2e2a3971..f459190c6 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationHelper.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/NotificationHelper.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/Registration.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/Registration.java index 8295bef08..a58175549 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/Registration.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/Registration.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreBatteryOptimizationDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreBatteryOptimizationDialog.kt index 7aaf3aad4..61d64fdd7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreBatteryOptimizationDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreBatteryOptimizationDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreDataRestrictionDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreDataRestrictionDialog.kt index 0220e0ad1..415a97064 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreDataRestrictionDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/RequestIgnoreDataRestrictionDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SelectBridgesTransportDialogFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SelectBridgesTransportDialogFragment.kt index 4766fd2d8..3bfdcf70b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SelectBridgesTransportDialogFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SelectBridgesTransportDialogFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SendCrashReport.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SendCrashReport.kt index 03d09eda6..03247103f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SendCrashReport.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/SendCrashReport.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateDefaultBridgesDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateDefaultBridgesDialog.kt index 8c50c4448..7c197b5ca 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateDefaultBridgesDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateDefaultBridgesDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateModulesDialogFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateModulesDialogFragment.java index 295b3ee45..0e6d552a3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateModulesDialogFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/UpdateModulesDialogFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/CheckUpdatesDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/CheckUpdatesDialog.kt index 1a24f6523..fc5de1409 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/CheckUpdatesDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/CheckUpdatesDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs.progressDialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitDialogBridgesRequest.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitDialogBridgesRequest.kt index 40418273e..a55328a00 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitDialogBridgesRequest.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitDialogBridgesRequest.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs.progressDialogs diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitProgressDialog.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitProgressDialog.java index 7fbe1e3f6..8f9439f7f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitProgressDialog.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/PleaseWaitProgressDialog.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs.progressDialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/RootCheckingProgressDialog.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/RootCheckingProgressDialog.java index 5e31d5cf6..f8e9951ef 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/RootCheckingProgressDialog.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dialogs/progressDialogs/RootCheckingProgressDialog.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dialogs.progressDialogs; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenter.java index d1348205c..b28540f83 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dnscrypt_fragment; @@ -59,6 +59,7 @@ import static pan.alexander.tordnscrypt.di.SharedPreferencesModule.DEFAULT_PREFERENCES_NAME; import static pan.alexander.tordnscrypt.utils.logger.Logger.loge; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.CONNECTION_LOGS; +import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_OUTBOUND_PROXY; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.IGNORE_SYSTEM_DNS; import static pan.alexander.tordnscrypt.utils.enums.ModuleState.FAULT; import static pan.alexander.tordnscrypt.utils.enums.ModuleState.RESTARTING; @@ -69,6 +70,7 @@ import static pan.alexander.tordnscrypt.utils.enums.ModuleState.UNDEFINED; import static pan.alexander.tordnscrypt.utils.enums.OperationMode.ROOT_MODE; import static pan.alexander.tordnscrypt.utils.enums.OperationMode.VPN_MODE; +import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.TOR_OUTBOUND_PROXY; import javax.inject.Inject; import javax.inject.Named; @@ -390,10 +392,10 @@ private void dnsCryptStartedWithError(LogDataModel logData) { //If Tor is ready, app will use Tor Exit node DNS in VPN mode if (fragmentManager != null && !(modulesStatus.isTorReady() && modulesStatus.getMode() == VPN_MODE)) { - NotificationHelper notificationHelper = NotificationHelper.setHelperMessage( - context, context.getString(R.string.helper_dnscrypt_no_internet), "helper_dnscrypt_no_internet"); - if (notificationHelper != null) { - notificationHelper.show(fragmentManager, NotificationHelper.TAG_HELPER); + if (defaultPreferences.get().getBoolean(DNSCRYPT_OUTBOUND_PROXY, false)) { + showCheckProxyMessage(fragmentManager); + } else { + showCannotConnectMessage(fragmentManager); } } @@ -402,6 +404,22 @@ private void dnsCryptStartedWithError(LogDataModel logData) { loge("DNSCrypt Error: " + logData.getLines()); } + private void showCheckProxyMessage(FragmentManager fragmentManager) { + NotificationHelper notificationHelper = NotificationHelper.setHelperMessage( + context, context.getString(R.string.helper_tor_check_proxy), "helper_dnscrypt_check_proxy"); + if (notificationHelper != null) { + notificationHelper.show(fragmentManager, NotificationHelper.TAG_HELPER); + } + } + + private void showCannotConnectMessage(FragmentManager fragmentManager) { + NotificationHelper notificationHelper = NotificationHelper.setHelperMessage( + context, context.getString(R.string.helper_dnscrypt_no_internet), "helper_dnscrypt_no_internet"); + if (notificationHelper != null) { + notificationHelper.show(fragmentManager, NotificationHelper.TAG_HELPER); + } + } + @Override public void onConnectionRecordsUpdated(@NonNull String connectionRecords) { String logLines = ""; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenterInterface.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenterInterface.java index ca6b7e8b4..5deae2bdf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenterInterface.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentPresenterInterface.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dnscrypt_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentReceiver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentReceiver.java index 1307e3006..9eca5fd7f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentReceiver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentReceiver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dnscrypt_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentView.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentView.java index a4e143560..eabadc1c7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentView.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptFragmentView.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dnscrypt_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptRunFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptRunFragment.java index fcda84132..f61835c40 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptRunFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/dnscrypt_fragment/DNSCryptRunFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.dnscrypt_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeCountryData.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeCountryData.kt index 82f24ae15..5b0351968 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeCountryData.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeCountryData.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeHashToAddress.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeHashToAddress.kt index 2697f0798..17b2d06a7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeHashToAddress.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgeHashToAddress.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgePingResult.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgePingResult.kt index 670932f69..aac158dce 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgePingResult.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgePingResult.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesInteractor.kt index 91ccf3f86..99ce6ac7e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesRepository.kt index 534199746..64e97b6e3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/BridgesCountriesRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeInteractor.kt index 46f379f45..eecf53a98 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeRepository.kt index c3880bd53..e3aa3d559 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/DefaultVanillaBridgeRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/IpRangeToCountry.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/IpRangeToCountry.kt index f182cbbe5..ba5fed837 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/IpRangeToCountry.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/IpRangeToCountry.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/ParseBridgesResult.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/ParseBridgesResult.kt index da9e9e44d..85e064cb9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/ParseBridgesResult.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/ParseBridgesResult.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesInteractor.kt index f27c6d5ed..ea4c3d45b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesRepository.kt index 4f7654e1d..ae4628981 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/bridges/RequestBridgesRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractor.kt index f5e51e3f5..8dc2af895 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_checker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractorImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractorImpl.kt index 8a016eaa6..d2a0447cf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractorImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerInteractorImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_checker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerRepository.kt index 0ba613ef8..07ccb1bef 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/ConnectionCheckerRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_checker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/OnInternetConnectionCheckedListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/OnInternetConnectionCheckedListener.kt index 8376c4fc8..25b8b47cb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/OnInternetConnectionCheckedListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_checker/OnInternetConnectionCheckedListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_checker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsConverter.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsConverter.kt index 38fee8599..fca520b64 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsConverter.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsConverter.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records @@ -39,8 +39,10 @@ import pan.alexander.tordnscrypt.domain.dns_resolver.DnsInteractor import pan.alexander.tordnscrypt.domain.preferences.PreferenceRepository import pan.alexander.tordnscrypt.iptables.IptablesFirewall import pan.alexander.tordnscrypt.modules.ModulesStatus +import pan.alexander.tordnscrypt.settings.tor_apps.ApplicationData.Companion.SPECIAL_PORT_NTP import pan.alexander.tordnscrypt.settings.tor_apps.ApplicationData.Companion.SPECIAL_UID_CONNECTIVITY_CHECK import pan.alexander.tordnscrypt.settings.tor_apps.ApplicationData.Companion.SPECIAL_UID_KERNEL +import pan.alexander.tordnscrypt.settings.tor_apps.ApplicationData.Companion.SPECIAL_UID_NTP import pan.alexander.tordnscrypt.utils.Constants.HOST_NAME_REGEX import pan.alexander.tordnscrypt.utils.Constants.LOOPBACK_ADDRESS import pan.alexander.tordnscrypt.utils.Constants.META_ADDRESS @@ -303,6 +305,7 @@ class ConnectionRecordsConverter @Inject constructor( uid = packetRecord.uid, saddr = packetRecord.saddr, daddr = packetRecord.daddr, + dport = packetRecord.dport, protocol = packetRecord.protocol ).also { it.time = packetRecord.time @@ -358,6 +361,8 @@ class ConnectionRecordsConverter @Inject constructor( || appsSpecialAllowed.contains(SPECIAL_UID_CONNECTIVITY_CHECK) && connectivityCheckManager.getConnectivityCheckIps() .contains(packetRecord.daddr) + || appsSpecialAllowed.contains(SPECIAL_UID_NTP) + && packetRecord.uid == 1000 && packetRecord.dport == SPECIAL_PORT_NTP ) { false } else if (isIpInLanRange(packetRecord.daddr)) { @@ -368,7 +373,7 @@ class ConnectionRecordsConverter @Inject constructor( if (allThroughTor) { !appsBypassTor.contains(packetRecord.uid) } else { - !appsThroughTor.contains(packetRecord.uid) + appsThroughTor.contains(packetRecord.uid) } } else { true diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractor.kt index 3703dc191..b8b2e4e92 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractorInterface.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractorInterface.kt index de42cf51f..920729d2d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractorInterface.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsInteractorInterface.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsParser.kt index 3b3156f2d..398b15d5d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records @@ -146,36 +146,45 @@ class ConnectionRecordsParser @Inject constructor( if (Tethering.apIsOn && fixTTL && record.saddr.contains(apAddresses)) { lines.append("").append("WiFi").append("") - .append(protocol).append(" -> ") + .append(protocol).append(" → ") } else if (Tethering.usbTetherOn && fixTTL && record.saddr.contains(usbAddresses)) { lines.append("").append("USB").append("") - .append(protocol).append(" -> ") + .append(protocol).append(" → ") } else if (Tethering.ethernetOn && fixTTL && record.saddr.contains( localEthernetDeviceAddress ) ) { lines.append("").append("LAN").append("") - .append(protocol).append(" -> ") + .append(protocol).append(" → ") } else if (appName.isNotEmpty()) { lines.append("").append(appName).append("") - .append(protocol).append(" -> ") + .append(protocol).append(" → ") } else { lines.append("").append("Unknown UID").append(record.uid).append("") - .append(protocol).append(" -> ") + .append(protocol).append(" → ") } record.dnsLogEntry?.let { - lines.append(it.domainsChain.joinToString(" -> ")) - .append(" -> ") + lines.append(it.domainsChain.joinToString(" → ")) + .append(" → ") .append(record.daddr) + if (record.dport != 0) { + lines.append(":${record.dport}") + } } ?: record.reverseDns?.let { - lines.append(it).append(" -> ").append(record.daddr) + lines.append(it).append(" → ").append(record.daddr) + if (record.dport != 0) { + lines.append(":${record.dport}") + } } ?: run { lines.append(record.daddr) + if (record.dport != 0) { + lines.append(":${record.dport}") + } } } else if (record is DnsLogEntry) { if (record.domainsChain.isNotEmpty()) { - lines.append(record.domainsChain.joinToString(" -> ")) + lines.append(record.domainsChain.joinToString(" → ")) } if (record.blocked && record.blockedByIpv6) { lines.append(" ipv6") diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsRepository.kt index b3ef95b75..f8669cdd2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/ConnectionRecordsRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/OnConnectionRecordsUpdatedListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/OnConnectionRecordsUpdatedListener.kt index 61ccda367..5ad90f639 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/OnConnectionRecordsUpdatedListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/OnConnectionRecordsUpdatedListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionData.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionData.kt index dbe26a01d..ba1560514 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionData.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionData.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records.entities @@ -75,6 +75,7 @@ class PacketRecord( val uid: Int, val saddr: String, val daddr: String, + val dport: Int, val protocol: Int = UNDEFINED, val allowed: Boolean ): ConnectionData(time) { diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionLogEntry.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionLogEntry.kt index cce86955d..48ee4f968 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionLogEntry.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionLogEntry.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records.entities @@ -37,6 +37,7 @@ data class PacketLogEntry( val uid: Int, val saddr: String, val daddr: String, + val dport: Int, val protocol: Int = UNDEFINED, var reverseDns: String? = null, var dnsLogEntry: DnsLogEntry? = null diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionProtocol.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionProtocol.kt index e238ae03a..88f525087 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionProtocol.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/connection_records/entities/ConnectionProtocol.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.connection_records.entities diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractor.kt index 5371d2af4..e32c458fc 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_resolver diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractorImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractorImpl.kt index ecf5f8edf..e63629aba 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractorImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsInteractorImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_resolver diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsRepository.kt index e9ea21006..1fd1ec529 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_resolver/DnsRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_resolver diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRuleType.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRuleType.kt index 6506011b7..d5676bcae 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRuleType.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRuleType.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractor.kt index 166d64421..a2196074a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractorImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractorImpl.kt index a8e710209..1d1f64708 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractorImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesInteractorImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesMetadata.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesMetadata.kt index cff217bf3..a4f5f267b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesMetadata.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesMetadata.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesRepository.kt index b4396add5..682ed4ba0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/dns_rules/DnsRulesRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.dns_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/AbstractLogParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/AbstractLogParser.kt index fab6f1e4a..7e92ed966 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/AbstractLogParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/AbstractLogParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/DNSCryptInteractorInterface.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/DNSCryptInteractorInterface.kt index 23659f7d0..fde9fd7c8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/DNSCryptInteractorInterface.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/DNSCryptInteractorInterface.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ITPDInteractorInterface.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ITPDInteractorInterface.kt index e86f4b6cd..0d6629d39 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ITPDInteractorInterface.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ITPDInteractorInterface.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogDataModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogDataModel.kt index f927dd817..e33705103 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogDataModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogDataModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderFacade.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderFacade.kt index 2a8bca705..6ed15e71c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderFacade.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderFacade.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderInteractors.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderInteractors.kt index bf147c6b1..3fc6619f8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderInteractors.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderInteractors.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderLoop.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderLoop.kt index 29767be3a..556f31056 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderLoop.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/LogReaderLoop.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ModulesLogRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ModulesLogRepository.kt index 00249b426..47a463dab 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ModulesLogRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ModulesLogRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ScheduledExecutor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ScheduledExecutor.kt index 7604b4a5e..155a602a1 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ScheduledExecutor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/ScheduledExecutor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/TorInteractorInterface.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/TorInteractorInterface.kt index c0d73e61c..2f060cd23 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/TorInteractorInterface.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/TorInteractorInterface.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptInteractor.kt index 5187af73d..3b6651cf3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.dnscrypt diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptLogParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptLogParser.kt index 765b9ab24..2ff0a337e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptLogParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/DNSCryptLogParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.dnscrypt diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/OnDNSCryptLogUpdatedListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/OnDNSCryptLogUpdatedListener.kt index 716145a11..9e2b76851 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/OnDNSCryptLogUpdatedListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/dnscrypt/OnDNSCryptLogUpdatedListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.dnscrypt diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlInteractor.kt index 494ac47ea..558c58b1d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.itpd diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlParser.kt index 52cf1d093..82212df0f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDHtmlParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.itpd diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDInteractor.kt index f1b94a4e8..843a936b5 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.itpd diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDLogParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDLogParser.kt index 3650f3332..34c3e45eb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDLogParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/ITPDLogParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.itpd diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDHtmlUpdatedListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDHtmlUpdatedListener.kt index c179abbde..480af9280 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDHtmlUpdatedListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDHtmlUpdatedListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.itpd diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDLogUpdatedListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDLogUpdatedListener.kt index 5bd61110c..0afed4e99 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDLogUpdatedListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/itpd/OnITPDLogUpdatedListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.itpd diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/OnTorLogUpdatedListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/OnTorLogUpdatedListener.kt index 194605c56..08eb2b260 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/OnTorLogUpdatedListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/OnTorLogUpdatedListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.tor diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorInteractor.kt index 80ad4cfb7..843a7002a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.tor diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorLogParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorLogParser.kt index 91059e228..4f50395e9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorLogParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/log_reader/tor/TorLogParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.log_reader.tor diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceRepository.kt index c7f47d7c3..2600988a6 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.preferences diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceType.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceType.java index 74d251440..747532a86 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceType.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/preferences/PreferenceType.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.preferences; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/resources/ResourceRepository.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/resources/ResourceRepository.kt index c3c81def6..07991d761 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/resources/ResourceRepository.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/resources/ResourceRepository.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.resources diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractor.kt index 37f223b82..07c04b299 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.tor_ips diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractorImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractorImpl.kt index 96a6a7110..be2ee6557 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractorImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/domain/tor_ips/TorIpsInteractorImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.domain.tor_ips diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivity.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivity.java index a8c3b4a4b..76eb3e41f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivity.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivity.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.help; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivityReceiver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivityReceiver.java index 94aa68a43..cf3ed2901 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivityReceiver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/HelpActivityReceiver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.help; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/Utils.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/Utils.kt index 1ff865112..5f39a6b40 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/Utils.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/help/Utils.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.help diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/AssetsExtractCommand.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/AssetsExtractCommand.java index c3690a64d..a27fc2b35 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/AssetsExtractCommand.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/AssetsExtractCommand.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/BusybExtractCommand.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/BusybExtractCommand.java index 6458cda38..3efe9075e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/BusybExtractCommand.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/BusybExtractCommand.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ChmodCommand.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ChmodCommand.java index a82949402..df0256046 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ChmodCommand.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ChmodCommand.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Command.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Command.java index 4e518ef3a..26982e0ea 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Command.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Command.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/DNSCryptExtractCommand.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/DNSCryptExtractCommand.java index 0ee5f1f75..952853ffa 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/DNSCryptExtractCommand.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/DNSCryptExtractCommand.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ITPDExtractCommand.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ITPDExtractCommand.java index 6cab6759e..7a8eecc89 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ITPDExtractCommand.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/ITPDExtractCommand.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Installer.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Installer.java index a879b6ddc..63fcc1ec1 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Installer.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/Installer.java @@ -14,13 +14,14 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; import android.annotation.SuppressLint; import android.app.Activity; +import android.content.Context; import android.content.Intent; import android.content.IntentFilter; import android.content.SharedPreferences; @@ -32,7 +33,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -46,6 +49,8 @@ import pan.alexander.tordnscrypt.modules.ModulesStatus; import pan.alexander.tordnscrypt.modules.ModulesVersions; import pan.alexander.tordnscrypt.settings.PathVars; +import pan.alexander.tordnscrypt.settings.tor_apps.ApplicationData; +import pan.alexander.tordnscrypt.utils.apps.InstalledApplicationsManager; import pan.alexander.tordnscrypt.utils.executors.CoroutineExecutor; import pan.alexander.tordnscrypt.utils.root.RootCommands; import pan.alexander.tordnscrypt.utils.filemanager.FileManager; @@ -54,6 +59,7 @@ import static pan.alexander.tordnscrypt.di.SharedPreferencesModule.DEFAULT_PREFERENCES_NAME; import static pan.alexander.tordnscrypt.utils.logger.Logger.loge; import static pan.alexander.tordnscrypt.utils.logger.Logger.logi; +import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.CLEARNET_APPS; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.MAIN_ACTIVITY_RECREATE; import static pan.alexander.tordnscrypt.utils.root.RootCommandsMark.INSTALLER_MARK; import static pan.alexander.tordnscrypt.utils.root.RootExecService.COMMAND_RESULT; @@ -198,6 +204,8 @@ public void installModules() { mainActivity.runOnUiThread(installerUIChanger.showDialogAfterInstallation()); + excludeTorSelfContainingAppsFromTor(mainActivity); + } catch (Exception e) { loge("Installation fault", e); @@ -521,4 +529,25 @@ public void onActivityChange(MainActivity mainActivity) { this.mainActivity = mainActivity; installerUIChanger.setMainActivity(mainActivity); } + + private void excludeTorSelfContainingAppsFromTor(Context context) { + try { + List packetsWithOwnTor = Arrays.asList( + context.getResources().getStringArray(R.array.contains_own_tor) + ); + List installedApps = new InstalledApplicationsManager.Builder() + .build() + .getInstalledApps(); + Set uidsContainsOwnTor = new HashSet<>(); + for (ApplicationData app: installedApps) { + if (packetsWithOwnTor.contains(app.getPack())) { + uidsContainsOwnTor.add(String.valueOf(app.getUid())); + } + } + preferenceRepository.get().setStringSetPreference(CLEARNET_APPS, uidsContainsOwnTor); + logi("Installer: exclude apps from Tor OK"); + } catch (Exception e) { + loge("Installer excludeTorSelfContainingAppsFromTor", e); + } + } } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerHelper.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerHelper.java index f2115a9d1..ad0b3052b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerHelper.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerHelper.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerReceiver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerReceiver.java index 3fbe2af86..e26e7b1cb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerReceiver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerReceiver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerUIChanger.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerUIChanger.java index 31a72d694..5c0edcc18 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerUIChanger.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/InstallerUIChanger.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/TorExtractCommand.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/TorExtractCommand.java index c08188aaa..ec8310771 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/TorExtractCommand.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/installer/TorExtractCommand.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.installer; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesConstants.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesConstants.java index f9639452e..8de4aa849 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesConstants.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesConstants.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesFirewall.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesFirewall.kt index 537af1dd7..475b6bfa3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesFirewall.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesFirewall.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables @@ -199,8 +199,8 @@ class IptablesFirewall @Inject constructor( when { range.size == 1 -> when { + range.first() == SPECIAL_UID_KERNEL -> "$iptables -A $FILTER_FIREWALL_LAN -m owner ! --uid-owner 0:999999999 -j MARK --set-mark $FIREWALL_RETURN_MARK || true" range.first() >= 0 -> "$iptables -A $FILTER_FIREWALL_LAN -m owner --uid-owner ${range.first()} -j MARK --set-mark $FIREWALL_RETURN_MARK 2> /dev/null || true" - range.first() == SPECIAL_UID_KERNEL -> "$iptables -A $FILTER_FIREWALL_LAN -m owner ! --uid-owner 0:999999999 -j MARK --set-mark $FIREWALL_RETURN_MARK 2> /dev/null || true" else -> "" } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesReceiver.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesReceiver.kt index b93a0d6da..207e3c9e9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesReceiver.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesReceiver.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRules.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRules.java index 510d7213c..0a71db8c9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRules.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRules.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRulesSender.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRulesSender.java index 0bb421ffa..5aa6b4fce 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRulesSender.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesRulesSender.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesUtils.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesUtils.kt index 3606b967c..46b9ae862 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesUtils.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/IptablesUtils.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables @@ -23,7 +23,7 @@ object IptablesUtils { fun Set.groupToRanges() = sorted().fold(mutableListOf>()) { nonBreakingRanges, element -> val lastRange = nonBreakingRanges.lastOrNull() - if (element >= 0 && lastRange?.lastOrNull() == element - 1) { + if (element > 0 && lastRange?.lastOrNull() == element - 1) { lastRange.add(element) } else { nonBreakingRanges.add(mutableListOf(element)) diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/KillSwitchNotification.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/KillSwitchNotification.kt index dfadac6f6..ef199a922 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/KillSwitchNotification.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/KillSwitchNotification.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/ModulesIptablesRules.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/ModulesIptablesRules.java index d4da7bc2a..1c1d5fc19 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/ModulesIptablesRules.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/ModulesIptablesRules.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables; @@ -364,7 +364,7 @@ public List configureIptables( } String nflogDns = ""; - String nflogPackets = ""; + String nflogPackets; if (showConnectionLogs) { nflogDns = TextUtils.join("; ", Arrays.asList( iptables + "-A " + FILTER_OUTPUT_CORE + " -p udp -s " + LOOPBACK_ADDRESS + " --sport " + pathVars.getDNSCryptPort() + " -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true", @@ -375,7 +375,13 @@ public List configureIptables( nflogPackets = TextUtils.join("; ", Arrays.asList( iptables + "-t mangle -D OUTPUT -p all -m owner ! --uid-owner " + appUID + " -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true", iptables + "-t mangle -D OUTPUT -p all -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true", - iptables + "-t mangle -I OUTPUT -p all -m owner ! --uid-owner " + appUID + " -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true" + //iptables + "-t mangle -I OUTPUT -p all -m owner ! --uid-owner " + appUID + " -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true" + iptables + "-t mangle -I OUTPUT -p all -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true" + )); + } else { + nflogPackets = TextUtils.join("; ", Arrays.asList( + iptables + "-t mangle -D OUTPUT -p all -m owner ! --uid-owner " + appUID + " -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true", + iptables + "-t mangle -D OUTPUT -p all -m limit --limit 1000/min -j NFLOG --nflog-prefix " + NFLOG_PREFIX + " --nflog-group " + NFLOG_GROUP + " 2> /dev/null || true" )); } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/Tethering.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/Tethering.java index c2a9a7af6..00c6ff7d0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/Tethering.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/iptables/Tethering.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.iptables; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenter.java index 94c2c8328..29fcad396 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.itpd_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenterInterface.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenterInterface.java index a7b9fb153..d34152f7b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenterInterface.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentPresenterInterface.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.itpd_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentReceiver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentReceiver.java index c77c9be9f..de7801178 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentReceiver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentReceiver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.itpd_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentView.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentView.java index f380ff8c7..84213a4ac 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentView.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDFragmentView.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.itpd_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDRunFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDRunFragment.java index 732576781..d5890037f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDRunFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/itpd_fragment/ITPDRunFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.itpd_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/Language.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/Language.java index 1a64c6c2a..e1d2cda38 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/Language.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/Language.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.language; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguageList.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguageList.java index b22c6f90f..5208807fe 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguageList.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguageList.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.language; @@ -32,6 +32,7 @@ public class LanguageList { "QXJhYmljICjYp9mE2LnYsdio2YrYqSk=", "QnVsZ2FyaWFuICjQkdGK0LvQs9Cw0YDRgdC60Lgp", "Q2hpbmVzZSAo5Lit5paHKQ==", + "RHV0Y2ggKE5lZGVybGFuZHNlKQ==", "RW5nbGlzaCAoRW5nbGlzaCk=", "RmlubmlzaCAoU3VvbWkp", "RnJlbmNoIChGcmFuw6dhaXMp", @@ -46,6 +47,7 @@ public class LanguageList { "UG9ydHVndWVzZS1CUiAoUG9ydHVndcOqcy1CUik=", "UnVzc2lhbiAo0KDRg9GB0YHQutC40Lkp", "U3BhbmlzaCAoRXNwYcOxb2wp", + "VGFtaWwgKOCupOCuruCuv+CutOCvjSk=", "VHVya2lzaCAoVMO8cmvDp2Up", "VWtyYWluaWFuICjQo9C60YDQsNGX0L3RgdGM0LrQsCk=" }; @@ -54,6 +56,7 @@ public class LanguageList { "ar", "bg", "zh", + "nl", "en", "fi", "fr", @@ -68,6 +71,7 @@ public class LanguageList { "pt-rBR", "ru", "es", + "ta", "tr", "uk" }; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguagePreference.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguagePreference.java index 7fc81c1dd..44ddd63e7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguagePreference.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/language/LanguagePreference.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.language; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/MainFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/MainFragment.java index 459097787..28bf53d73 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/MainFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/MainFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.main_fragment; @@ -28,6 +28,7 @@ import android.content.IntentFilter; import android.content.res.Configuration; import android.graphics.drawable.Drawable; +import android.os.Build; import android.os.Bundle; import android.text.Spanned; import android.view.Gravity; @@ -717,7 +718,11 @@ public void setTorProgressBarProgress(int progress) { pbTorMainFragment.setIndeterminate(false); } if (progress >= 0) { - pbTorMainFragment.setProgress(progress); + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) { + pbTorMainFragment.setProgress(progress, true); + } else { + pbTorMainFragment.setProgress(progress); + } pbTorMainFragment.setVisibility(View.VISIBLE); divTorMainFragment.setVisibility(View.GONE); } else { diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/ViewPagerAdapter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/ViewPagerAdapter.java index 2bba1182e..849619ceb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/ViewPagerAdapter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/main_fragment/ViewPagerAdapter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.main_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ContextUIDUpdater.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ContextUIDUpdater.java index 927e1b217..65a0469ef 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ContextUIDUpdater.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ContextUIDUpdater.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesActionSender.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesActionSender.kt index 8f9a91765..6a79afe1d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesActionSender.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesActionSender.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesAux.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesAux.java index 7d495e2f5..427e09a4e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesAux.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesAux.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesKiller.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesKiller.java index b4d3ea47f..17da4cec9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesKiller.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesKiller.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesReceiver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesReceiver.java index f7f98329c..51dca5b4e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesReceiver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesReceiver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRestarter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRestarter.java index 90401b756..d634171f1 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRestarter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRestarter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRunner.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRunner.java index 5fb4607c4..2ac5a5a90 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRunner.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesRunner.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesService.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesService.java index faedd0f02..89f173b09 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesService.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesService.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceActions.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceActions.java index 965bbc286..344d8f81f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceActions.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceActions.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceNotificationManager.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceNotificationManager.java index a562d8cad..7180a345a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceNotificationManager.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesServiceNotificationManager.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStarterHelper.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStarterHelper.java index b482a27d9..1cac66310 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStarterHelper.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStarterHelper.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; @@ -66,6 +66,8 @@ import static pan.alexander.tordnscrypt.utils.logger.Logger.logi; import static pan.alexander.tordnscrypt.utils.logger.Logger.logw; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_LISTEN_PORT; +import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_OUTBOUND_PROXY; +import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_OUTBOUND_PROXY_PORT; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.FAKE_SNI; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.FAKE_SNI_HOSTS; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.TOR_DNS_PORT; @@ -565,7 +567,14 @@ private void checkTorPortsForBusyness(List lines) { fixTorProxyPort(lines, TOR_DNS_PORT, dnsPort); } if (socksPort.matches(NUMBER_REGEX) && checker.isPortBusy(socksPort)) { + String savedTorSocksPort = pathVars.getTorSOCKSPort(); + String savedDnsCryptProxyPort = defaultPreferences.get().getString(DNSCRYPT_OUTBOUND_PROXY_PORT, "9050"); fixTorProxyPort(lines, TOR_SOCKS_PORT, socksPort); + String currentTorSocksPort = pathVars.getTorSOCKSPort(); + if (!savedTorSocksPort.equals(currentTorSocksPort) + && savedTorSocksPort.equals(savedDnsCryptProxyPort)) { + fixDnsCryptProxyPort(savedDnsCryptProxyPort, currentTorSocksPort); + } } if (httpTunnelPort.matches(NUMBER_REGEX) && checker.isPortBusy(httpTunnelPort)) { fixTorProxyPort(lines, TOR_HTTP_TUNNEL_PORT, httpTunnelPort); @@ -576,6 +585,24 @@ private void checkTorPortsForBusyness(List lines) { } + private void fixDnsCryptProxyPort(String savedPort, String port) { + List lines = readDnsCryptConfiguration(); + for (int i = 0; i < lines.size(); i++) { + String line = lines.get(i); + if (line.contains("proxy =")) { + line = line.replace(savedPort, port); + lines.set(i, line); + break; + } + } + defaultPreferences.get().edit().putString(DNSCRYPT_OUTBOUND_PROXY_PORT, port).apply(); + saveDnsCryptConfiguration(lines); + if (modulesStatus.getDnsCryptState() != STOPPED + && defaultPreferences.get().getBoolean(DNSCRYPT_OUTBOUND_PROXY, false)) { + ModulesRestarter.restartDNSCrypt(context); + } + } + private void fixTorProxyPort(List lines, String proxyType, String proxyPort) { PortChecker checker = portChecker.get(); String port = checker.getFreePort(proxyPort); diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStateLoop.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStateLoop.java index 8e717426e..ce9e4e73a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStateLoop.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStateLoop.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; @@ -420,7 +420,8 @@ private void updateIptablesRules( stopCounter = STOP_COUNTER_DELAY; } else if (operationMode == VPN_MODE) { - if (dnsCryptState == STOPPED && torState == STOPPED + if (vpnServiceEnabled && + dnsCryptState == STOPPED && torState == STOPPED && (firewallState == STOPPED || firewallState == STOPPING)) { ServiceVPNHelper.stop("All modules stopped", modulesService); } else if (vpnServiceEnabled) { @@ -692,7 +693,7 @@ private void setITPDReady(boolean ready) { private void saveFirewallState(ModuleState firewallState) { savedFirewallState = firewallState; if (firewallState == RUNNING) { - ModulesAux.saveFirewallStateRunning(true); + ModulesAux.saveFirewallStateRunning(true); } else if (firewallState == STOPPED) { ModulesAux.saveFirewallStateRunning(false); } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatus.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatus.java index 8b80f8885..bf1802382 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatus.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatus.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatusBroadcaster.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatusBroadcaster.kt index c11b6f9fe..eafac0ac4 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatusBroadcaster.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesStatusBroadcaster.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesVersions.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesVersions.java index 448a38673..a34227aa0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesVersions.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ModulesVersions.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ProcessStarter.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ProcessStarter.kt index 532a379ed..3074199b8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ProcessStarter.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/ProcessStarter.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/TorRestarterReconnector.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/TorRestarterReconnector.kt index 03a30fd0b..a48c0133f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/TorRestarterReconnector.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/TorRestarterReconnector.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.modules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/UsageStatistic.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/UsageStatistic.kt index 76496eba1..b6ba028a8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/UsageStatistic.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/modules/UsageStatistic.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ @file:JvmName("UsageStatistics") diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogManager.kt index 61c91704a..2d7e14b4d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.nflog @@ -165,15 +165,20 @@ class NflogManager @Inject constructor( delay(1000) - nflogShell?.waitForIdle() //Waits for nflog to stop + val complete = nflogShell?.waitForIdle() //Waits for nflog to stop - attempts++ + if (complete != false) { + attempts++ + } if (nflogActive && attempts < ATTEMPTS_TO_OPEN_NFLOG) { + closeNflogShell() + stopNflogHandlerThread() loge("Attempt ${attempts + 1} to restart Nflog") } }.onFailure { + attempts++ loge("NflogManager openNflogShell", it) } } while (nflogActive && attempts < ATTEMPTS_TO_OPEN_NFLOG) @@ -229,7 +234,6 @@ class NflogManager @Inject constructor( loge("NflogManager startNfLogHandlerThread", e) } finally { if (nflogShell?.isRunning != true || nflogShell?.isIdle != false) { - nflogShell = null handlerThread?.quitSafely() } } @@ -242,7 +246,7 @@ class NflogManager @Inject constructor( private fun getNflogStartCommand(): String = with(pathVars.get()) { return "$nflogPath " + - "-ouid $appUid " + + //"-ouid $appUid " + "-group $NFLOG_GROUP " + "-dport $dnsCryptPort " + "-tport $torDNSPort " + @@ -327,9 +331,10 @@ class NflogManager @Inject constructor( } private fun readNflogPidFile(): String = try { - File(getPidFilePath()).let { file -> + val filePath = getPidFilePath() + File(filePath).let { file -> if (file.isFile) { - file.useLines { it.first() } + Shell.SU.run("cat $filePath").stdout.first().trim() } else { loge("NflogManager was unable to read pid. The file does not exist.") "" diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogParser.kt index 75399ee4d..88811f297 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.nflog @@ -35,7 +35,7 @@ class NflogParser @Inject constructor( ) { private val packetPattern = - Pattern.compile("PKT TIME:(\\d+?) UID:(-?\\d+?) ([^ ]+?) SIP:([^ ]*) SPT:(\\d+?) DIP:([^ ]*) DPT:(\\d+?)") + Pattern.compile("PKT TIME:(\\d+?) UID:(-?\\d+?) ([^ ]+?) SIP:([^ ]*) SPT:(\\d+?) DIP:([^ ]*) DPT:(\\d+)") private val dnsPattern = Pattern.compile("DNS TIME:(\\d+?) QNAME:([^ ]*) ANAME:([^ ]*) CNAME:([^ ]*) HINFO:(.*?) RCODE:(\\d+?) IP:([^ ]*)") @@ -71,11 +71,12 @@ class NflogParser @Inject constructor( return null } - val protocolInt = when(protocol) { + val protocolInt = when (protocol) { "TCP" -> 6 "UDP" -> 17 "ICMPv4" -> 1 "ICMPv6" -> 58 + "IGMP" -> 2 else -> UNDEFINED } @@ -84,9 +85,12 @@ class NflogParser @Inject constructor( uid = uid.toInt(), saddr = saddr, daddr = daddr, + dport = if ((uid == -1L || uid == 0L || uid == 1020L) && sport < dport) sport else dport, protocol = protocolInt, allowed = true ) + } else { + loge("NflogParser failed to parse line $line") } return null diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogSessionsHolder.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogSessionsHolder.kt index 17a9b82e9..8c590a284 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogSessionsHolder.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/nflog/NflogSessionsHolder.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.nflog diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/AlterConfig.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/AlterConfig.kt index 4faa76039..4b2fe8092 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/AlterConfig.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/AlterConfig.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.patches diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/ConfigUtil.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/ConfigUtil.kt index bf0c7858e..0966e72ee 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/ConfigUtil.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/ConfigUtil.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.patches diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/Patch.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/Patch.kt index b389bcfbb..7f1f5b724 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/Patch.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/patches/Patch.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.patches @@ -25,10 +25,12 @@ import androidx.annotation.WorkerThread import androidx.preference.PreferenceManager import pan.alexander.tordnscrypt.App import pan.alexander.tordnscrypt.BuildConfig +import pan.alexander.tordnscrypt.R import pan.alexander.tordnscrypt.settings.PathVars import pan.alexander.tordnscrypt.utils.Constants.QUAD_DNS_41 import pan.alexander.tordnscrypt.utils.logger.Logger.loge import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_BOOTSTRAP_RESOLVERS +import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.STUN_SERVERS import java.util.concurrent.atomic.AtomicBoolean private const val SAVED_VERSION_CODE = "SAVED_VERSION_CODE" @@ -75,6 +77,7 @@ class Patch(private val context: Context, private val pathVars: PathVars) { addTorDormantOption() fixTorIPv6VirtualAddresses() addDNSCryptOdohServers() + updateStunServers() if (dnsCryptConfigPatches.isNotEmpty()) { configUtil.patchDNSCryptConfig(dnsCryptConfigPatches) @@ -301,4 +304,13 @@ class Patch(private val context: Context, private val pathVars: PathVars) { ) } + private fun updateStunServers() { + val defaultPreferences = PreferenceManager.getDefaultSharedPreferences(context) + val builtinServers = context.resources.getStringArray(R.array.tor_snowflake_stun_servers).joinToString(",") + val savedServers = defaultPreferences.getString(STUN_SERVERS, builtinServers) + if (savedServers?.startsWith("stun.l.google.com:19302") != false) { + defaultPreferences.edit().putString(STUN_SERVERS, builtinServers).apply() + } + } + } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyFragment.kt index 7d1702ef6..77a3d897d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.proxy @@ -36,12 +36,16 @@ import android.widget.Toast import androidx.core.content.ContextCompat import androidx.core.widget.NestedScrollView import androidx.fragment.app.Fragment +import androidx.lifecycle.lifecycleScope import androidx.preference.PreferenceManager import kotlinx.coroutines.Job +import kotlinx.coroutines.delay +import kotlinx.coroutines.launch import pan.alexander.tordnscrypt.App import pan.alexander.tordnscrypt.R import pan.alexander.tordnscrypt.databinding.FragmentProxyBinding import pan.alexander.tordnscrypt.domain.preferences.PreferenceRepository +import pan.alexander.tordnscrypt.proxy.ProxyHelper.Companion.CHECK_CONNECTION_TIMEOUT_MSEC import pan.alexander.tordnscrypt.settings.SettingsActivity import pan.alexander.tordnscrypt.utils.Constants.DEFAULT_PROXY_PORT import pan.alexander.tordnscrypt.utils.Constants.LOOPBACK_ADDRESS @@ -87,6 +91,7 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { private var etBackground: Drawable? = null private var task: Job? = null + private var progressJob: Job? = null override fun onCreate(savedInstanceState: Bundle?) { App.instance.daggerComponent.inject(this) @@ -131,25 +136,31 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { addTextChangedListener(this@ProxyFragment) } + val nonTorProxified = sharedPreferences?.getBoolean(USE_PROXY, false) ?: false val dnsCryptProxified = sharedPreferences?.getBoolean(DNSCRYPT_OUTBOUND_PROXY, false) ?: false val torProxified = sharedPreferences?.getBoolean(TOR_OUTBOUND_PROXY, false) ?: false val itpdProxified = sharedPreferences?.getBoolean(I2PD_OUTBOUND_PROXY, false) ?: false + saveToSharedPreferences(PROXIFY_DNSCRYPT, dnsCryptProxified) + saveToSharedPreferences(PROXIFY_TOR, torProxified) + saveToSharedPreferences(PROXIFY_I2PD, itpdProxified) + + (binding.chbProxyNonTor as CompoundButton).apply { + isChecked = nonTorProxified + } val passAndNameIsEmpty = binding.etProxyPass.text.toString().trim().isEmpty() && binding.etProxyUserName.text.toString().trim().isEmpty() (binding.chbProxyDNSCrypt as CompoundButton).apply { - isEnabled = passAndNameIsEmpty - isChecked = - getBoolFromSharedPreferences(PROXIFY_DNSCRYPT) && dnsCryptProxified && passAndNameIsEmpty + isEnabled = passAndNameIsEmpty || dnsCryptProxified + isChecked = dnsCryptProxified } (binding.chbProxyTor as CompoundButton).apply { - isChecked = getBoolFromSharedPreferences(PROXIFY_TOR) && torProxified + isChecked = torProxified } (binding.chbProxyITPD as CompoundButton).apply { - isEnabled = passAndNameIsEmpty - isChecked = - getBoolFromSharedPreferences(PROXIFY_I2PD) && itpdProxified && passAndNameIsEmpty + isEnabled = passAndNameIsEmpty || itpdProxified + isChecked = itpdProxified } etBackground = binding.etProxyServer.background @@ -166,11 +177,15 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { var serverOrPortChanged = false + val activateNonTorProxy = binding.chbProxyNonTor.isChecked val activateDNSCryptProxy = binding.chbProxyDNSCrypt.isEnabled && binding.chbProxyDNSCrypt.isChecked val activateTorProxy = binding.chbProxyTor.isEnabled && binding.chbProxyTor.isChecked val activateITPDProxy = binding.chbProxyITPD.isEnabled && binding.chbProxyITPD.isChecked + if (getBoolFromSharedPreferences(USE_PROXY) != activateNonTorProxy) { + settingsChanged = true + } if (getBoolFromSharedPreferences(PROXIFY_DNSCRYPT) != activateDNSCryptProxy) { saveToSharedPreferences(PROXIFY_DNSCRYPT, activateDNSCryptProxy) settingsChanged = true @@ -185,13 +200,17 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { } val proxyServer = binding.etProxyServer.text.toString().trim().let { - it.ifEmpty { + if (it.isEmpty() || !it.matches(IP_REGEX)) { LOOPBACK_ADDRESS + } else { + it } } val proxyPort = binding.etProxyPort.text.toString().trim().let { - it.ifEmpty { + if (it.isEmpty() || !it.matches(PORT_REGEX) || it.toLong() > MAX_PORT_NUMBER) { DEFAULT_PROXY_PORT + } else { + it } } if (proxyServer != sharedPreferences?.getString(PROXY_ADDRESS, LOOPBACK_ADDRESS) @@ -203,8 +222,8 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { saveToSharedPreferences(PROXY_ADDRESS, proxyServer) saveToSharedPreferences(PROXY_PORT, proxyPort) - val proxyUserName = binding.etProxyUserName.text.toString().trim() - val proxyPass = binding.etProxyPass.text.toString().trim() + val proxyUserName = binding.etProxyUserName.text.toString().trim().take(127) + val proxyPass = binding.etProxyPass.text.toString().trim().take(127) if (getTextFromSharedPreferences(PROXY_USER) != proxyUserName) { saveToSharedPreferences(PROXY_USER, proxyUserName) serverOrPortChanged = true @@ -220,15 +239,15 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { return } - val swUseProxy = sharedPreferences?.getBoolean(USE_PROXY, false) ?: false val setBypassProxy = preferenceRepository.get().getStringSetPreference(CLEARNET_APPS_FOR_PROXY) - if (swUseProxy && (setBypassProxy.isNotEmpty() || proxyServer != LOOPBACK_ADDRESS)) { + if (setBypassProxy.isNotEmpty() || proxyServer != LOOPBACK_ADDRESS) { proxyHelper.manageProxy( proxyServer, proxyPort, serverOrPortChanged, + activateNonTorProxy, activateDNSCryptProxy, activateTorProxy, activateITPDProxy @@ -238,6 +257,7 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { proxyServer, proxyPort, serverOrPortChanged = false, + enableNonTorProxy = false, enableDNSCryptProxy = false, enableTorProxy = false, enableItpdProxy = false @@ -285,6 +305,8 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { val context = activity as Context + startProgress() + etBackground?.let { binding.etProxyServer.background = it binding.etProxyPort.background = it @@ -333,38 +355,13 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { task = executor.submit("ProxyFragment checkProxy") { try { val result = proxyHelper.checkProxyConnectivity(server, port.toInt(), user, pass) - - if (_binding != null) { + handler.get().post { if (result.matches(Regex("\\d+"))) { - handler.get().post { - binding.tvProxyHint.apply { - text = String.format( - getString(R.string.proxy_successful_connection), - result - ) - setTextColor( - ContextCompat.getColor( - context, - R.color.textModuleStatusColorRunning - ) - ) - binding.scrollProxy.scrollToBottom() - } - } + setConnectionSuccess(result) + hideProgressBar() } else { - handler.get().post { - binding.tvProxyHint.apply { - text = - String.format(getString(R.string.proxy_no_connection), result) - setTextColor( - ContextCompat.getColor( - context, - R.color.textModuleStatusColorAlert - ) - ) - binding.scrollProxy.scrollToBottom() - } - } + setConnectionFailed(result) + hideProgressBar() } } } catch (e: Exception) { @@ -374,6 +371,65 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { } } + private fun startProgress() { + binding.tvProxyHint.text = "" + binding.pbSocksProxy.visibility = View.VISIBLE + binding.scrollProxy.scrollToBottom() + progressJob?.cancel() + binding.pbSocksProxy.progress = 0 + val animationDelay = 100 + progressJob = lifecycleScope.launch { + for (i in 0..CHECK_CONNECTION_TIMEOUT_MSEC / animationDelay) { + binding.pbSocksProxy.setProgressCompat( + (i * 100) / (CHECK_CONNECTION_TIMEOUT_MSEC / animationDelay), + true + ) + delay(animationDelay.toLong()) + } + } + } + + private fun setConnectionSuccess(result: String) { + _binding ?: return + binding.tvProxyHint.apply { + text = String.format( + getString(R.string.proxy_successful_connection), + result + ) + setTextColor( + ContextCompat.getColor( + context, + R.color.textModuleStatusColorRunning + ) + ) + binding.scrollProxy.scrollToBottom() + } + } + + private fun setConnectionFailed(result: String) { + _binding ?: return + binding.tvProxyHint.apply { + text = + String.format(getString(R.string.proxy_no_connection), result) + setTextColor( + ContextCompat.getColor( + context, + R.color.textModuleStatusColorAlert + ) + ) + binding.scrollProxy.scrollToBottom() + } + } + + private fun hideProgressBar() { + progressJob?.cancel() + lifecycleScope.launch { + binding.pbSocksProxy.progress = 100 + delay(250) + binding.pbSocksProxy.visibility = View.GONE + } + } + private fun saveToSharedPreferences(name: String, value: Any?) { val editor = sharedPreferences?.edit() @@ -393,7 +449,7 @@ class ProxyFragment : Fragment(), View.OnClickListener, TextWatcher { return sharedPreferences?.getBoolean(value, false) ?: false } - private fun NestedScrollView.scrollToBottom() { + private fun NestedScrollView.scrollToBottom() = post { val lastChild = getChildAt(childCount - 1) val bottom = lastChild.bottom + paddingBottom val delta = bottom - (scrollY + height) diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyHelper.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyHelper.kt index ebf6c4ec6..5e07b968b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyHelper.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/proxy/ProxyHelper.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.proxy @@ -33,15 +33,12 @@ import pan.alexander.tordnscrypt.utils.connectionchecker.ProxyAuthManager.setDef import pan.alexander.tordnscrypt.utils.enums.ModuleState import pan.alexander.tordnscrypt.utils.executors.CoroutineExecutor import pan.alexander.tordnscrypt.utils.filemanager.FileManager -import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_OUTBOUND_PROXY import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.I2PD_OUTBOUND_PROXY -import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.PROXIFY_DNSCRYPT -import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.PROXIFY_I2PD -import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.PROXIFY_TOR import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.PROXY_PASS import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.PROXY_USER import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.TOR_OUTBOUND_PROXY +import pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.USE_PROXY import java.net.InetAddress import java.net.InetSocketAddress import java.net.Proxy @@ -50,9 +47,6 @@ import java.net.SocketAddress import javax.inject.Inject import javax.inject.Named - -private const val CHECK_CONNECTION_TIMEOUT_MSEC = 500 - class ProxyHelper @Inject constructor( private val context: Context, private val pathVars: PathVars, @@ -60,44 +54,11 @@ class ProxyHelper @Inject constructor( @Named(DEFAULT_PREFERENCES_NAME) private val defaultPreferences: SharedPreferences ) { - fun enableProxy() { - val proxifyDnsCrypt = defaultPreferences.getBoolean(PROXIFY_DNSCRYPT, false) - val proxifyTor = defaultPreferences.getBoolean(PROXIFY_TOR, false) - val proxifyItpd = defaultPreferences.getBoolean(PROXIFY_I2PD, false) - - val server = - defaultPreferences.getString( - PreferenceKeys.PROXY_ADDRESS, LOOPBACK_ADDRESS - ) ?: LOOPBACK_ADDRESS - val port = defaultPreferences.getString( - PreferenceKeys.PROXY_PORT, DEFAULT_PROXY_PORT - ) ?: DEFAULT_PROXY_PORT - - manageProxy(server, port, false, proxifyDnsCrypt, proxifyTor, proxifyItpd) - } - - fun disableProxy() { - val proxyServer = - defaultPreferences.getString( - PreferenceKeys.PROXY_ADDRESS, LOOPBACK_ADDRESS - ) ?: LOOPBACK_ADDRESS - val proxyPort = defaultPreferences.getString( - PreferenceKeys.PROXY_PORT, DEFAULT_PROXY_PORT - ) ?: DEFAULT_PROXY_PORT - - manageProxy( - proxyServer, proxyPort, - serverOrPortChanged = false, - enableDNSCryptProxy = false, - enableTorProxy = false, - enableItpdProxy = false - ) - } - fun manageProxy( server: String, port: String, serverOrPortChanged: Boolean, + enableNonTorProxy: Boolean, enableDNSCryptProxy: Boolean, enableTorProxy: Boolean, enableItpdProxy: Boolean @@ -105,6 +66,7 @@ class ProxyHelper @Inject constructor( val modulesStatus = ModulesStatus.getInstance() + val nonTorProxified = defaultPreferences.getBoolean(USE_PROXY, false) val dnsCryptProxified = defaultPreferences.getBoolean(DNSCRYPT_OUTBOUND_PROXY, false) val torProxified = defaultPreferences.getBoolean(TOR_OUTBOUND_PROXY, false) val itpdProxified = defaultPreferences.getBoolean(I2PD_OUTBOUND_PROXY, false) @@ -116,31 +78,41 @@ class ProxyHelper @Inject constructor( } executor.submit("ProxyHelper manageProxy") { - if ((enableDNSCryptProxy xor dnsCryptProxified) || serverOrPortChanged) { + val dnsCryptSettingChanged = enableDNSCryptProxy xor dnsCryptProxified + if (dnsCryptSettingChanged || serverOrPortChanged) { manageDNSCryptProxy(pathVars.dnscryptConfPath, proxyAddr, enableDNSCryptProxy) defaultPreferences.edit().putBoolean(DNSCRYPT_OUTBOUND_PROXY, enableDNSCryptProxy) .apply() - if (modulesStatus.dnsCryptState == ModuleState.RUNNING) { + if (modulesStatus.dnsCryptState == ModuleState.RUNNING && (enableDNSCryptProxy || dnsCryptSettingChanged)) { ModulesRestarter.restartDNSCrypt(context) } } - if ((enableTorProxy xor torProxified) || serverOrPortChanged) { + val torSettingChanged = enableTorProxy xor torProxified + if (torSettingChanged || serverOrPortChanged) { mangeTorProxy(pathVars.torConfPath, proxyAddr, enableTorProxy) defaultPreferences.edit().putBoolean(TOR_OUTBOUND_PROXY, enableTorProxy).apply() - if (modulesStatus.torState == ModuleState.RUNNING) { + if (modulesStatus.torState == ModuleState.RUNNING && (enableTorProxy || torSettingChanged)) { ModulesRestarter.restartTor(context) } } - if ((enableItpdProxy xor itpdProxified) || serverOrPortChanged) { + val itpdSettingChanged = enableItpdProxy xor itpdProxified + if (itpdSettingChanged || serverOrPortChanged) { manageITPDProxy(pathVars.itpdConfPath, proxyAddr, enableItpdProxy) defaultPreferences.edit().putBoolean(I2PD_OUTBOUND_PROXY, enableItpdProxy).apply() - if (modulesStatus.itpdState == ModuleState.RUNNING) { + if (modulesStatus.itpdState == ModuleState.RUNNING && (enableItpdProxy || itpdSettingChanged)) { ModulesRestarter.restartITPD(context) } } - modulesStatus.setIptablesRulesUpdateRequested(context, true) + val nonTorProxySettingsChanged = enableNonTorProxy xor nonTorProxified + if (dnsCryptSettingChanged || torSettingChanged || itpdSettingChanged + || nonTorProxySettingsChanged || serverOrPortChanged + ) { + defaultPreferences.edit().putBoolean(USE_PROXY, enableNonTorProxy).apply() + modulesStatus.setIptablesRulesUpdateRequested(context, true) + } + } } @@ -165,7 +137,7 @@ class ProxyHelper @Inject constructor( Socket(proxy).use { setDefaultAuth(proxyUser, proxyPass) it.connect(sockaddr, CHECK_CONNECTION_TIMEOUT_MSEC) - it.soTimeout = 1 + it.soTimeout = CHECK_CONNECTION_TIMEOUT_MSEC if (!it.isConnected) { throw IllegalStateException("unable to connect to $dnsCryptFallbackRes") @@ -286,4 +258,8 @@ class ProxyHelper @Inject constructor( } FileManager.writeTextFileSynchronous(context, itpdConfPath, itpdConf) } + + companion object { + const val CHECK_CONNECTION_TIMEOUT_MSEC = 5000 + } } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ConfigEditorFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ConfigEditorFragment.java index 5e82b483a..61c35bd6c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ConfigEditorFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ConfigEditorFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/OnBackPressListener.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/OnBackPressListener.kt index 87d036818..8573c254b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/OnBackPressListener.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/OnBackPressListener.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PathVars.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PathVars.java index 184b3d0fe..667ad2c33 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PathVars.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PathVars.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesCommonFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesCommonFragment.java index 7b9298b69..ffae04a48 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesCommonFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesCommonFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings; @@ -29,14 +29,12 @@ import android.os.Bundle; import androidx.annotation.NonNull; -import androidx.appcompat.widget.SwitchCompat; import androidx.fragment.app.DialogFragment; import androidx.fragment.app.FragmentManager; import androidx.preference.Preference; import androidx.preference.PreferenceCategory; import androidx.preference.PreferenceFragmentCompat; import androidx.preference.PreferenceScreen; -import androidx.preference.SwitchPreferenceCompat; import android.os.Handler; import android.view.LayoutInflater; @@ -45,7 +43,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Set; import dagger.Lazy; import pan.alexander.tordnscrypt.App; @@ -66,12 +63,10 @@ import pan.alexander.tordnscrypt.utils.enums.FileOperationsVariants; import pan.alexander.tordnscrypt.utils.filemanager.FileManager; import pan.alexander.tordnscrypt.utils.filemanager.OnTextFileOperationsCompleteListener; -import pan.alexander.tordnscrypt.views.SwitchPlusClickPreference; import pan.alexander.tordnscrypt.vpn.service.ServiceVPNHelper; import static pan.alexander.tordnscrypt.TopFragment.TOP_BROADCAST; import static pan.alexander.tordnscrypt.di.SharedPreferencesModule.DEFAULT_PREFERENCES_NAME; -import static pan.alexander.tordnscrypt.proxy.ProxyFragmentKt.CLEARNET_APPS_FOR_PROXY; import static pan.alexander.tordnscrypt.settings.tor_preferences.PreferencesTorFragment.ISOLATE_DEST_ADDRESS; import static pan.alexander.tordnscrypt.settings.tor_preferences.PreferencesTorFragment.ISOLATE_DEST_PORT; import static pan.alexander.tordnscrypt.utils.Constants.LOOPBACK_ADDRESS; @@ -90,7 +85,6 @@ import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.KILL_SWITCH; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.MAIN_ACTIVITY_RECREATE; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.MULTI_USER_SUPPORT; -import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.PROXY_ADDRESS; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.REMOTE_CONTROL; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.RUN_MODULES_WITH_ROOT; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.TOR_TETHERING; @@ -108,7 +102,7 @@ public class PreferencesCommonFragment extends PreferenceFragmentCompat implements Preference.OnPreferenceChangeListener, Preference.OnPreferenceClickListener, - SwitchPlusClickPreference.SwitchPlusClickListener, OnTextFileOperationsCompleteListener { + OnTextFileOperationsCompleteListener { @Inject public Lazy preferenceRepository; @@ -201,10 +195,10 @@ public View onCreateView(@NonNull LayoutInflater inflater, ViewGroup container, && !modulesStatus.isUseModulesWithRoot(); PreferenceScreen preferenceScreen = findPreference("pref_common"); PreferenceCategory proxySettingsCategory = findPreference("categoryCommonProxy"); - SwitchPlusClickPreference swUseProxy = findPreference(USE_PROXY); + Preference swUseProxy = findPreference(USE_PROXY); if (preferenceScreen != null && proxySettingsCategory != null) { if ((modulesStatus.getMode() == VPN_MODE || fixTTL) && swUseProxy != null) { - swUseProxy.setSwitchClickListener(this); + swUseProxy.setOnPreferenceClickListener(this); } else { preferenceScreen.removePreference(proxySettingsCategory); } @@ -303,19 +297,6 @@ public void onResume() { itpdConfPath = pathVars.get().getItpdConfPath(); itpdTunnelsPath = pathVars.get().getItpdTunnelsPath(); - SharedPreferences sharedPreferences = defaultPreferences.get(); - boolean swUseProxy = sharedPreferences.getBoolean(USE_PROXY, false); - String proxyServer = sharedPreferences.getString(PROXY_ADDRESS, ""); - Set setBypassProxy = preferenceRepository.get().getStringSetPreference(CLEARNET_APPS_FOR_PROXY); - if (swUseProxy - && setBypassProxy.isEmpty() - && proxyServer.equals(LOOPBACK_ADDRESS)) { - Preference swUseProxyPreference = findPreference(USE_PROXY); - if (swUseProxyPreference != null) { - ((SwitchPreferenceCompat) swUseProxyPreference).setChecked(false); - } - } - executor.submit("PreferencesCommonFragment verifier", () -> { try { Verifier verifier = verifierLazy.get(); @@ -500,35 +481,10 @@ public boolean onPreferenceClick(@NonNull Preference preference) { } catch (Exception e) { loge("PreferencesCommonFragment ALWAYS_ON_VPN", e); } - } - return false; - } - - - @Override - public void onCheckedChanged(SwitchCompat buttonView, boolean isChecked) { - - Context context = getActivity(); - - if (context == null) { - return; - } - - SharedPreferences sharedPreferences = defaultPreferences.get(); - String proxyServer = sharedPreferences.getString(PROXY_ADDRESS, LOOPBACK_ADDRESS); - Set setBypassProxy = preferenceRepository.get().getStringSetPreference(CLEARNET_APPS_FOR_PROXY); - if (setBypassProxy.isEmpty() && proxyServer.equals(LOOPBACK_ADDRESS)) { + } else if (USE_PROXY.equals(preference.getKey())) { openProxySettings(); - } else if (isChecked) { - enableProxy(); - } else { - disableProxy(); } - } - - @Override - public void onClick(View view) { - openProxySettings(); + return false; } private void openProxySettings() { @@ -543,14 +499,6 @@ private void openProxySettings() { context.startActivity(intent); } - private void enableProxy() { - proxyHelper.get().enableProxy(); - } - - private void disableProxy() { - proxyHelper.get().disableProxy(); - } - private void activityCurrentRecreate() { Activity activity = getActivity(); diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesFastFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesFastFragment.java index 3879e2496..4e21430d8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesFastFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/PreferencesFastFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsActivity.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsActivity.java index 2453fd02b..a19cff19b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsActivity.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsActivity.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsParser.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsParser.java index da63b9dcd..4b611cddf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsParser.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/SettingsParser.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ShowLogFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ShowLogFragment.java index 6ca10028a..dca767e50 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ShowLogFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/ShowLogFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelay.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelay.kt index b9cd43b8b..859f30e90 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelay.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelay.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_relays diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayItem.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayItem.java index e5239705d..bf9f9acb3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayItem.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayItem.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_relays; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayViewModel.kt index ebc2976d4..6cfc59cda 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelayViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_relays diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelaysAdapter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelaysAdapter.java index 61218610d..6e0d1d2e1 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelaysAdapter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsRelaysAdapter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_relays; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsServerRelay.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsServerRelay.java index 97f8c658d..27b655024 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsServerRelay.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/DnsServerRelay.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_relays; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/PreferencesDNSCryptRelays.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/PreferencesDNSCryptRelays.java index 527691d29..988f9e881 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/PreferencesDNSCryptRelays.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/PreferencesDNSCryptRelays.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_relays; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/RelayConfigurationResult.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/RelayConfigurationResult.kt index 64db0e930..0c0fa1994 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/RelayConfigurationResult.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_relays/RelayConfigurationResult.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_relays diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/AddRemoteRulesUrlDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/AddRemoteRulesUrlDialog.kt index d7af4f985..200d58877 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/AddRemoteRulesUrlDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/AddRemoteRulesUrlDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesFragment.kt index 2c57447bc..91bb885e3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules @@ -360,6 +360,7 @@ class DnsRulesFragment : Fragment(), DnsRulesReceiver.Callback, super.onDestroyView() unregisterReceiver() + _binding?.rvDnsRules?.adapter = null _binding = null rulesAdapter = null } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesViewModel.kt index 8ce123eb6..3b4186de2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/DnsRulesViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingDnsRulesWorker.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingDnsRulesWorker.kt index 08bbea506..940e0a63b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingDnsRulesWorker.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingDnsRulesWorker.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.existing diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingRulesWorkManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingRulesWorkManager.kt index 4ef6f3afb..6e7dc9773 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingRulesWorkManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/existing/RemixExistingRulesWorkManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.existing diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/DnsRulesUpdateProgress.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/DnsRulesUpdateProgress.kt index af306e92b..740c26763 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/DnsRulesUpdateProgress.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/DnsRulesUpdateProgress.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.local diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/ImportRulesManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/ImportRulesManager.kt index bb7663107..08e03e467 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/ImportRulesManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/ImportRulesManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.local diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalDnsRulesWorker.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalDnsRulesWorker.kt index 6ee0a1b31..1d1decf1b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalDnsRulesWorker.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalDnsRulesWorker.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.local diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalRulesWorkManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalRulesWorkManager.kt index e13bbe000..b7e9eda60 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalRulesWorkManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/local/UpdateLocalRulesWorkManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.local diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/receiver/DnsRulesReceiver.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/receiver/DnsRulesReceiver.kt index ed35e87b1..5cbf725d7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/receiver/DnsRulesReceiver.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/receiver/DnsRulesReceiver.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.receiver diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRuleRecycleItem.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRuleRecycleItem.kt index c7bd3544f..3df0db9a9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRuleRecycleItem.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRuleRecycleItem.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.recycler diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRulesRecyclerAdapter.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRulesRecyclerAdapter.kt index 09166d4da..ecc7f4de6 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRulesRecyclerAdapter.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/recycler/DnsRulesRecyclerAdapter.kt @@ -14,21 +14,27 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.recycler import android.annotation.SuppressLint +import android.content.Context +import android.os.Handler +import android.os.Looper import android.text.Editable import android.text.TextWatcher import android.view.LayoutInflater import android.view.View import android.view.View.GONE +import android.view.View.OnFocusChangeListener import android.view.View.VISIBLE import android.view.ViewGroup +import android.view.inputmethod.InputMethodManager import android.widget.TextView import androidx.core.content.ContextCompat +import androidx.core.os.postDelayed import androidx.recyclerview.widget.RecyclerView import androidx.recyclerview.widget.RecyclerView.NO_POSITION import pan.alexander.tordnscrypt.R @@ -63,6 +69,9 @@ class DnsRulesRecyclerAdapter( var rulesType: DnsRuleType? = null private val rules: MutableList = mutableListOf() + private val handler by lazy { Handler(Looper.getMainLooper()) } + private var recyclerView: RecyclerView? = null + override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): RecyclerView.ViewHolder { return try { when (viewType) { @@ -118,6 +127,17 @@ class DnsRulesRecyclerAdapter( } } + override fun onAttachedToRecyclerView(recyclerView: RecyclerView) { + super.onAttachedToRecyclerView(recyclerView) + this.recyclerView = recyclerView + } + + override fun onDetachedFromRecyclerView(recyclerView: RecyclerView) { + super.onDetachedFromRecyclerView(recyclerView) + handler.removeCallbacksAndMessages(null) + this.recyclerView = null + } + @SuppressLint("NotifyDataSetChanged") fun updateRules(rules: List) { this.rules.apply { @@ -298,11 +318,27 @@ class DnsRulesRecyclerAdapter( return } - editRule(position, s.toString()) + if (recyclerView?.isComputingLayout == true) { + handler.postDelayed(50L) { + editRule(position, s.toString()) + } + } else { + editRule(position, s.toString()) + } } } + val onFocusChangeListener = OnFocusChangeListener { v, hasFocus -> + val imm = + itemView.context.getSystemService(Context.INPUT_METHOD_SERVICE) as InputMethodManager + if (hasFocus) { + imm.showSoftInput(v, 0) + } else { + imm.hideSoftInputFromWindow(v.windowToken, 0) + } + } + override fun bind(position: Int) { when (val rule = rules[position]) { is DnsRuleRecycleItem.DnsSingleRule -> { @@ -310,6 +346,7 @@ class DnsRulesRecyclerAdapter( etRule.setText(rule.rule, TextView.BufferType.EDITABLE) etRule.isEnabled = rule.active etRule.addTextChangedListener(watcher) + etRule.onFocusChangeListener = onFocusChangeListener swRuleActive.isChecked = rule.active swRuleActive.setOnClickListener(this@DnsSingleRuleViewHolder) if (rule.protected) { @@ -415,7 +452,7 @@ class DnsRulesRecyclerAdapter( DnsRuleType.IP_BLACKLIST -> prepareIPv6IfAny(text) else -> text } - if (textPrepared.matches(getRuleRegex())) { + if (textPrepared.matches(getRuleRegex()) && rule.rule != textPrepared) { rule.rule = textPrepared if (textPrepared != text) { notifyItemChanged(position) diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DnsRulesDownloadProgress.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DnsRulesDownloadProgress.kt index 602f1eeb6..e1278af26 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DnsRulesDownloadProgress.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DnsRulesDownloadProgress.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.remote diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DownloadRemoteRulesManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DownloadRemoteRulesManager.kt index 48a7ef861..49388d793 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DownloadRemoteRulesManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/DownloadRemoteRulesManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.remote diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteDnsRulesWorker.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteDnsRulesWorker.kt index 854b21a15..4cc8ce246 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteDnsRulesWorker.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteDnsRulesWorker.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.remote diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteRulesWorkManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteRulesWorkManager.kt index 0bd135aee..980c801cf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteRulesWorkManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_rules/remote/UpdateRemoteRulesWorkManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_rules.remote diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptConfigurationResult.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptConfigurationResult.kt index 847372143..2843fe458 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptConfigurationResult.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptConfigurationResult.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_servers diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptResolver.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptResolver.kt index e8d159b49..b13d05d11 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptResolver.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsCryptResolver.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_servers diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerFeatures.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerFeatures.kt index d7dab5b22..a2b967a62 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerFeatures.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerFeatures.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_servers diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerItem.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerItem.java index 48ab9c45f..eb24e39e0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerItem.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerItem.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_servers; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerViewModel.kt index a368dca55..f53959983 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServerViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_servers diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServersAdapter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServersAdapter.java index a4a4f1b60..c45212ea2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServersAdapter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/DnsServersAdapter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_servers; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/PreferencesDNSCryptServers.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/PreferencesDNSCryptServers.java index 730e67b0b..7710c01e7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/PreferencesDNSCryptServers.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_servers/PreferencesDNSCryptServers.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_servers; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/PreferencesDNSFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/PreferencesDNSFragment.java index 3d3ba94c4..c5e4b28a8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/PreferencesDNSFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/PreferencesDNSFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_settings; @@ -77,6 +77,7 @@ import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_LISTEN_PORT; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_NETPROBE_ADDRESS; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_OUTBOUND_PROXY; +import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_OUTBOUND_PROXY_PORT; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_RELAYS_REFRESH_DELAY; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_RULES_REFRESH_DELAY; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.DNSCRYPT_SERVERS_REFRESH_DELAY; @@ -139,7 +140,7 @@ public void onCreate(Bundle savedInstanceState) { preferences.add(findPreference("ipv6_servers")); preferences.add(findPreference("force_tcp")); preferences.add(findPreference(DNSCRYPT_OUTBOUND_PROXY)); - preferences.add(findPreference("proxy_port")); + preferences.add(findPreference(DNSCRYPT_OUTBOUND_PROXY_PORT)); preferences.add(findPreference(DNSCRYPT_BOOTSTRAP_RESOLVERS)); preferences.add(findPreference(IGNORE_SYSTEM_DNS)); preferences.add(findPreference(HTTP3_QUIC)); @@ -358,7 +359,7 @@ public boolean onPreferenceChange(@NonNull Preference preference, Object newValu VpnBuilder.vpnDnsSet.clear(); } return true; - } else if (Objects.equals(preference.getKey(), "proxy_port")) { + } else if (Objects.equals(preference.getKey(), DNSCRYPT_OUTBOUND_PROXY_PORT)) { boolean useModulesWithRoot = ModulesStatus.getInstance().getMode() == ROOT_MODE && ModulesStatus.getInstance().isUseModulesWithRoot(); if (!newValue.toString().matches("\\d+") || Long.parseLong(newValue.toString()) > MAX_PORT_NUMBER diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/RulesEraser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/RulesEraser.kt index 3df596264..7f43ca437 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/RulesEraser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/dnscrypt_settings/RulesEraser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.dnscrypt_settings diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallAppModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallAppModel.kt index 7e2b44858..3ee3e202b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallAppModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallAppModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallFragment.kt index e72cada79..985e56bff 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallNotification.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallNotification.kt index 08abb510d..62c85e4dc 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallNotification.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallNotification.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall @@ -41,6 +41,7 @@ import pan.alexander.tordnscrypt.App import pan.alexander.tordnscrypt.R import pan.alexander.tordnscrypt.settings.SettingsActivity import pan.alexander.tordnscrypt.modules.ModulesStatus +import pan.alexander.tordnscrypt.proxy.CLEARNET_APPS_FOR_PROXY import pan.alexander.tordnscrypt.utils.Utils.areNotificationsNotAllowed import pan.alexander.tordnscrypt.utils.logger.Logger.loge import pan.alexander.tordnscrypt.utils.logger.Logger.logi @@ -220,15 +221,18 @@ class FirewallNotification : BroadcastReceiver() { } logi("FirewallNotification package added UID $uid") + + excludeTorSelfContainingAppFromTorIfNeeded(context, packages[0], uid) } - private fun packageRemoved(context: Context?, intent: Intent) { + private fun packageRemoved(context: Context, intent: Intent) { logi("FirewallNotification packageRemoved received intent $intent") val uid = intent.getIntExtra(Intent.EXTRA_UID, 0) if (intent.getBooleanExtra(Intent.EXTRA_DATA_REMOVED, false)) { removeFirewallRule(context, uid) + removeTorRulesForUid(uid) } logi("FirewallNotification package removed UID $uid") @@ -272,6 +276,22 @@ class FirewallNotification : BroadcastReceiver() { } } + private fun excludeTorSelfContainingAppFromTorIfNeeded( + context: Context, + pack: String, + uid: Int + ) { + val packetsWithOwnTor = context.resources.getStringArray(R.array.contains_own_tor) + if (packetsWithOwnTor.contains(pack)) { + preferenceRepository.get().apply { + setStringSetPreference( + CLEARNET_APPS, + getStringSetPreference(CLEARNET_APPS) + uid.toString() + ) + } + } + } + private fun removeFirewallRule(context: Context?, uid: Int) { if (uid > 0) { @@ -310,6 +330,20 @@ class FirewallNotification : BroadcastReceiver() { } } + private fun removeTorRulesForUid(uid: Int) { + val preferences = preferenceRepository.get() + val torAppPreferenceKeys = listOf(UNLOCK_APPS, CLEARNET_APPS, CLEARNET_APPS_FOR_PROXY) + for (preferenceKey in torAppPreferenceKeys) { + val uids = preferences.getStringSetPreference(preferenceKey) + if (uids.contains(uid.toString())) { + preferences.setStringSetPreference( + preferenceKey, + uids.apply { remove(uid.toString()) } + ) + } + } + } + private fun closeNotification(notificationManager: NotificationManager?, notificationId: Int) { if (notificationId > 0) { notificationManager?.cancel(notificationId) diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallPreferencesFragment.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallPreferencesFragment.kt index ce91375e0..0c85b8a1c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallPreferencesFragment.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallPreferencesFragment.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallState.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallState.kt index 571381547..f73d8e85b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallState.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallState.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallUtils.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallUtils.kt index 7ee002ac1..6e03a62d3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallUtils.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallUtils.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallViewModel.kt index db7af7af8..93f03cf7c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/FirewallViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/SaveFirewallChangesDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/SaveFirewallChangesDialog.kt index f15f861bb..539a19c20 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/SaveFirewallChangesDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/SaveFirewallChangesDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapter.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapter.kt index 3160a0d26..14f1e24df 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapter.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapter.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall.adapter diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapterRecyclerItemDiffCallback.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapterRecyclerItemDiffCallback.kt index 29393db11..d8ac55cbf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapterRecyclerItemDiffCallback.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/firewall/adapter/FirewallAdapterRecyclerItemDiffCallback.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.firewall.adapter diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ITPDSubscriptionsFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ITPDSubscriptionsFragment.java index 95bdc475c..d927d0f99 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ITPDSubscriptionsFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ITPDSubscriptionsFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.itpd_settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionRecycleItem.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionRecycleItem.java index 53d6f4b7f..dc5e9bda8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionRecycleItem.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionRecycleItem.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.itpd_settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionsViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionsViewModel.kt index 67e2eeebe..8e6d7f5ea 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionsViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/ItpdSubscriptionsViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.itpd_settings diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/PreferencesITPDFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/PreferencesITPDFragment.java index 85ebe6fa0..b3734fc92 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/PreferencesITPDFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/itpd_settings/PreferencesITPDFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.itpd_settings; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/ApplicationData.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/ApplicationData.kt index bf9c74656..9293098fa 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/ApplicationData.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/ApplicationData.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_apps diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppData.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppData.kt index 529579f4e..33aefc152 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppData.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppData.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_apps diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppsAdapter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppsAdapter.java index 85707b7ff..441e5a9b7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppsAdapter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/TorAppsAdapter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_apps; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/UnlockTorAppsFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/UnlockTorAppsFragment.java index 79fd86b3e..6e42b3a5c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/UnlockTorAppsFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_apps/UnlockTorAppsFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_apps; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgeAdapter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgeAdapter.java index 73d928ca7..1a1d3dbb9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgeAdapter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgeAdapter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingComparator.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingComparator.java index 4bde1f1c2..9f50a8b1e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingComparator.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingComparator.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingHelper.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingHelper.kt index 7d509e848..cc0405c82 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingHelper.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/BridgePingHelper.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/DialogsFlowState.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/DialogsFlowState.kt index e9b6819c0..7e9256eaa 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/DialogsFlowState.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/DialogsFlowState.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/GetNewBridgesCallbacks.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/GetNewBridgesCallbacks.kt index 18cb93080..1710280fc 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/GetNewBridgesCallbacks.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/GetNewBridgesCallbacks.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/ObfsBridge.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/ObfsBridge.java index e0da41e7a..0ca8eeb1d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/ObfsBridge.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/ObfsBridge.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesBridges.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesBridges.java index 1090de3c5..d87159e49 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesBridges.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesBridges.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridges.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridges.java index 0226a7790..527c1f0c4 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridges.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridges.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges; @@ -767,16 +767,16 @@ private void addBridges(final List persistList) { pattern = Pattern.compile("^scramblesuit +" + bridgeBase + "( +password=\\w+)?"); } else if (inputLinesStr.contains(meek_lite.toString())) { inputBridgesType = meek_lite.toString(); - pattern = Pattern.compile("^meek_lite +" + bridgeBase + " +url=https://[\\w./-]+ +front=[\\w./-]+( +utls=\\w+)?"); + pattern = Pattern.compile("^meek_lite +" + bridgeBase + " +url=https://[\\w.+/-]+ +front=[\\w./-]+( +utls=\\w+)?"); } else if (inputLinesStr.contains(snowflake.toString())) { inputBridgesType = snowflake.toString(); - pattern = Pattern.compile("^snowflake +" + bridgeBase + "(?: +fingerprint=\\w+)?(?: +url=https://[\\w./-]+)?(?: +ampcache=https://[\\w./-]+)?(?: +front=[\\w./-]+)?(?: +ice=(?:stun:[\\w./-]+?:\\d+,?)+)?(?: +utls-imitate=\\w+)?(?: +sqsqueue=https://[\\w./-]+)?(?: +sqscreds=[-A-Za-z0-9+/=]+)?"); + pattern = Pattern.compile("^snowflake +" + bridgeBase + "(?: +fingerprint=\\w+)?(?: +url=https://[\\w.+/-]+)?(?: +ampcache=https://[\\w.+/-]+)?(?: +front=[\\w./-]+)?(?: +ice=(?:stun:[\\w./-]+?:\\d+,?)+)?(?: +utls-imitate=\\w+)?(?: +sqsqueue=https://[\\w.+/-]+)?(?: +sqscreds=[-A-Za-z0-9+/=]+)?"); } else if (inputLinesStr.contains(conjure.toString())) { inputBridgesType = conjure.toString(); pattern = Pattern.compile("^conjure +" + bridgeBase + ".*"); } else if (inputLinesStr.contains(webtunnel.toString())) { inputBridgesType = webtunnel.toString(); - pattern = Pattern.compile("^webtunnel +" + bridgeBase + " +url=http(s)?://[\\w./-]+(?: ver=[0-9.]+)?"); + pattern = Pattern.compile("^webtunnel +" + bridgeBase + " +url=http(s)?://[\\w.+/-]+(?: ver=[0-9.]+)?"); } else { pattern = Pattern.compile(bridgeBase); } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridgesViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridgesViewModel.kt index 05dacd0eb..569cbaa9e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridgesViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/PreferencesTorBridgesViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/SnowflakeConfigurator.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/SnowflakeConfigurator.java index 672131cb1..5c4a01d10 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/SnowflakeConfigurator.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_bridges/SnowflakeConfigurator.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_bridges; @@ -131,7 +131,7 @@ private String getFront(int rendezvousType) { if (rendezvous == AMP_CACHE) { return "www.google.com,cdn.ampproject.org"; } else if (rendezvous == CDN77) { - return "docs.plesk.com,www.phpmyadmin.net,app.datapacket.com"; + return "docs.plesk.com,maxst.icons8.com,app.datapacket.com"; } else if (rendezvous == AZURE) { return "ajax.aspnetcdn.com"; } else { diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/Countries.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/Countries.java index 481693eba..e84747076 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/Countries.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/Countries.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_countries; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/CountrySelectFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/CountrySelectFragment.java index f9ff046c1..35ad7dde9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/CountrySelectFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/CountrySelectFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_countries; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/SelectedCountries.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/SelectedCountries.java index 7a18a9416..638ef003a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/SelectedCountries.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_countries/SelectedCountries.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_countries; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogAddDomainIp.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogAddDomainIp.java index afbfa1c7a..69d143853 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogAddDomainIp.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogAddDomainIp.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogDomainIp.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogDomainIp.java index 96d688a72..45d07c450 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogDomainIp.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogDomainIp.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogEditDomainIp.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogEditDomainIp.java index 83413a8cc..ed1338bb5 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogEditDomainIp.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DialogEditDomainIp.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpAdapter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpAdapter.java index 86eb51657..e41528b78 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpAdapter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpAdapter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpDiffUtilItemCallback.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpDiffUtilItemCallback.kt index bfa9b971a..d899a057e 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpDiffUtilItemCallback.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpDiffUtilItemCallback.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpEntity.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpEntity.kt index 1422b9227..a7087901f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpEntity.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/DomainIpEntity.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsFragment.java index a87a8b921..94d1d3114 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsViewModel.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsViewModel.kt index f34708f00..18d0ab267 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsViewModel.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_ips/UnlockTorIpsViewModel.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_ips diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/ModifyForwardingRules.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/ModifyForwardingRules.kt index a26a134da..c293c8c85 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/ModifyForwardingRules.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/ModifyForwardingRules.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_preferences diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/PreferencesTorFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/PreferencesTorFragment.java index b53b692c4..528741e39 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/PreferencesTorFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/settings/tor_preferences/PreferencesTorFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.settings.tor_preferences; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/BaseTileService.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/BaseTileService.kt index 870ed4ca5..a2f0715c2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/BaseTileService.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/BaseTileService.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileManager.kt index ae881ef16..45af75802 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileService.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileService.kt index 5bd8e278b..15e2326e8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileService.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ChangeTorIpTileService.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/DNSCryptTileService.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/DNSCryptTileService.kt index b7db15816..c01c767d7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/DNSCryptTileService.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/DNSCryptTileService.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ITPDTileService.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ITPDTileService.kt index 1a37f4976..23c5237b6 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ITPDTileService.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ITPDTileService.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ModulesControlTileManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ModulesControlTileManager.kt index 18276ebd4..54326340d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ModulesControlTileManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/ModulesControlTileManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TilesLimiter.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TilesLimiter.kt index c6ba1e6aa..5fe3c2a4b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TilesLimiter.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TilesLimiter.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TorTileService.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TorTileService.kt index 22ac581dc..a90af8fe0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TorTileService.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tiles/TorTileService.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tiles diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenter.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenter.java index d7a9503f4..38c7e5dbb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenter.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenter.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tor_fragment; @@ -57,6 +57,7 @@ import pan.alexander.tordnscrypt.vpn.service.ServiceVPNHelper; import static pan.alexander.tordnscrypt.TopFragment.TOP_BROADCAST; +import static pan.alexander.tordnscrypt.di.SharedPreferencesModule.DEFAULT_PREFERENCES_NAME; import static pan.alexander.tordnscrypt.utils.logger.Logger.loge; import static pan.alexander.tordnscrypt.utils.logger.Logger.logi; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.IGNORE_SYSTEM_DNS; @@ -68,8 +69,10 @@ import static pan.alexander.tordnscrypt.utils.enums.ModuleState.STOPPING; import static pan.alexander.tordnscrypt.utils.enums.ModuleState.UNDEFINED; import static pan.alexander.tordnscrypt.utils.enums.OperationMode.ROOT_MODE; +import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.TOR_OUTBOUND_PROXY; import javax.inject.Inject; +import javax.inject.Named; public class TorFragmentPresenter implements TorFragmentPresenterInterface, OnTorLogUpdatedListener, OnInternetConnectionCheckedListener { @@ -77,6 +80,9 @@ public class TorFragmentPresenter implements TorFragmentPresenterInterface, @Inject public Lazy checkConnectionInteractor; @Inject + @Named(DEFAULT_PREFERENCES_NAME) + public Lazy defaultPreferences; + @Inject public Lazy preferenceRepository; @Inject public Lazy torInteractor; @@ -460,14 +466,29 @@ private void torStartingWithError(LogDataModel logData) { loge("Problem bootstrapping Tor: " + logData.getLines()); - NotificationHelper notificationHelper; - notificationHelper = NotificationHelper.setHelperMessage( - context, context.getString(R.string.helper_tor_use_bridges), "helper_tor_use_bridges"); + if (defaultPreferences.get().getBoolean(TOR_OUTBOUND_PROXY, false)) { + showCheckProxyMessage(fragmentManager); + } else { + showUseBridgesMessage(fragmentManager); + } + + setFixedErrorState(true); + } + + private void showCheckProxyMessage(FragmentManager fragmentManager) { + NotificationHelper notificationHelper = NotificationHelper.setHelperMessage( + context, context.getString(R.string.helper_tor_check_proxy), "helper_tor_check_proxy"); if (notificationHelper != null) { notificationHelper.show(fragmentManager, NotificationHelper.TAG_HELPER); } + } - setFixedErrorState(true); + private void showUseBridgesMessage(FragmentManager fragmentManager) { + NotificationHelper notificationHelper = NotificationHelper.setHelperMessage( + context, context.getString(R.string.helper_tor_use_bridges), "helper_tor_use_bridges"); + if (notificationHelper != null) { + notificationHelper.show(fragmentManager, NotificationHelper.TAG_HELPER); + } } private void checkInternetAvailable() { diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenterInterface.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenterInterface.java index f39ec0408..480f7c420 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenterInterface.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentPresenterInterface.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tor_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentReceiver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentReceiver.java index fd430e8c7..90f106c21 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentReceiver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentReceiver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tor_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentView.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentView.java index 437397760..251a03073 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentView.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorFragmentView.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tor_fragment; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorRunFragment.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorRunFragment.java index 672d8d789..ea3f02f4f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorRunFragment.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/tor_fragment/TorRunFragment.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.tor_fragment; @@ -24,6 +24,7 @@ import android.content.BroadcastReceiver; import android.content.Context; import android.content.IntentFilter; +import android.os.Build; import android.os.Bundle; import androidx.annotation.NonNull; @@ -272,7 +273,11 @@ public void setTorProgressBarProgress(int progress) { pbTor.setIndeterminate(false); } if (progress >= 0) { - pbTor.setProgress(progress); + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) { + pbTor.setProgress(progress, true); + } else { + pbTor.setProgress(progress); + } pbTor.setVisibility(View.VISIBLE); divTor.setVisibility(View.GONE); } else { diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/DownloadTask.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/DownloadTask.java index 36ac64150..33d59d69d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/DownloadTask.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/DownloadTask.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.update; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateCheck.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateCheck.java index 650f6763d..2acd4c171 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateCheck.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateCheck.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.update; @@ -189,7 +189,7 @@ private void compareVersions(String serverAnswer) { return; } - if (!iproArr[2].matches("\\d{3}")) { + if (!iproArr[2].matches("\\d{3,4}")) { showUpdateMessageAndSaveResult(R.string.update_fault); logw("compareVersions function fault iproArr pass regexp mismatch"); return; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateService.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateService.java index 34293760b..b19cffb24 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateService.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/update/UpdateService.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.update; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/AppExtensions.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/AppExtensions.kt index 6c65957a2..d5d387f96 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/AppExtensions.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/AppExtensions.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ @file:JvmName("AppExtension") diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Constants.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Constants.java index 304a9ce3e..8d51b504f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Constants.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Constants.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ThemeUtils.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ThemeUtils.kt index 255b17991..efcc2f497 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ThemeUtils.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ThemeUtils.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Utils.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Utils.kt index c26104b54..b1ddb99ab 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Utils.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/Utils.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/ApManager.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/ApManager.java index 795fc9661..3159bab41 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/ApManager.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/ApManager.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.ap; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/InternetSharingChecker.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/InternetSharingChecker.java index 025235a83..c7115f4ac 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/InternetSharingChecker.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/ap/InternetSharingChecker.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.ap; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/appexit/AppExitDetectService.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/appexit/AppExitDetectService.java index 8d2969836..0f74c7fd0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/appexit/AppExitDetectService.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/appexit/AppExitDetectService.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.appexit; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledAppNamesStorage.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledAppNamesStorage.kt index 27c0e47d6..1448a5a10 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledAppNamesStorage.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledAppNamesStorage.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.apps diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledApplicationsManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledApplicationsManager.kt index 7e2d47e44..6dff9ca09 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledApplicationsManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/apps/InstalledApplicationsManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.apps diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/bootcomplete/BootCompleteManager.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/bootcomplete/BootCompleteManager.java index 25750a040..f5d598838 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/bootcomplete/BootCompleteManager.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/bootcomplete/BootCompleteManager.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.bootcomplete; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/HttpInternetChecker.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/HttpInternetChecker.kt index eee463d51..f571840ef 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/HttpInternetChecker.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/HttpInternetChecker.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.connectionchecker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/NetworkChecker.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/NetworkChecker.kt index 07729ff92..0c73718d8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/NetworkChecker.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/NetworkChecker.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.connectionchecker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/ProxyAuthManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/ProxyAuthManager.kt index 42c251e98..1b475c510 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/ProxyAuthManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/ProxyAuthManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.connectionchecker diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/SocketInternetChecker.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/SocketInternetChecker.kt index bcf059802..76c592ecb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/SocketInternetChecker.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectionchecker/SocketInternetChecker.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.connectionchecker @@ -105,15 +105,17 @@ class SocketInternetChecker @Inject constructor() { InetSocketAddress(InetAddress.getByName(ip), port) socket.connect(sockAddress, PING_TIMEOUT_SEC * 1000) - socket.soTimeout = 100 + socket.soTimeout = PING_TIMEOUT_SEC * 1000 if (isProxyUsed(proxyAddress, proxyPort)) { - if (socket.inetAddress.isReachable(CHECK_ADDRESS_REACHABLE_TIMEOUT_SEC * 1000)) { - return (System.currentTimeMillis() - timeStart).toInt() - } + socket.shutdownOutput() + socket.getInputStream().read(byteArrayOf(0)) + return ((System.currentTimeMillis() - timeStart) / 2).toInt() } else { if (socket.isConnected) { - return (System.currentTimeMillis() - timeStart).toInt() + val time = System.currentTimeMillis() + socket.shutdownOutput() + return (time - timeStart).toInt() } } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectivitycheck/ConnectivityCheckManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectivitycheck/ConnectivityCheckManager.kt index f4f922faa..a61011e28 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectivitycheck/ConnectivityCheckManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/connectivitycheck/ConnectivityCheckManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.connectivitycheck diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/delegates/MutableLazy.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/delegates/MutableLazy.kt index d37851fd5..25d5b264c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/delegates/MutableLazy.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/delegates/MutableLazy.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.delegates diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsMessage.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsMessage.java index c21239be1..53c3882c5 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsMessage.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsMessage.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsRequest.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsRequest.java index 301c46c6a..cc7f7c0ad 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsRequest.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsRequest.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResolver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResolver.java index c71e0c096..75cec7eab 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResolver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResolver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResponse.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResponse.java index 0f55d82b6..a768b9447 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResponse.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DnsResponse.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DohResolver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DohResolver.java index a8a18051f..59ed2fdc0 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DohResolver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/DohResolver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Domain.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Domain.java index 67f035b17..5f3f90fbe 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Domain.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Domain.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Record.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Record.java index d4d56e494..4f4344239 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Record.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Record.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Resolver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Resolver.java index cfc6fcca3..d125e7281 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Resolver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/Resolver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/UdpResolver.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/UdpResolver.java index d2ced1f1d..e6f1454c0 100755 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/UdpResolver.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/dns/UdpResolver.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.dns; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/AccessPointState.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/AccessPointState.java index 07dab8690..67a05a528 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/AccessPointState.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/AccessPointState.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgeType.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgeType.java index 4ca0d1687..36764cb55 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgeType.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgeType.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgesSelector.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgesSelector.kt index db856f929..99d07e3b4 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgesSelector.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/BridgesSelector.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/FileOperationsVariants.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/FileOperationsVariants.java index 5873c654d..faa0b51e5 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/FileOperationsVariants.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/FileOperationsVariants.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleName.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleName.kt index b429bea8d..7f7f6cb87 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleName.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleName.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleState.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleState.java index d65c0837d..8c0a8b0f5 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleState.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/ModuleState.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/OperationMode.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/OperationMode.java index 4d6c66388..4c6d254d3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/OperationMode.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/OperationMode.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/VPNCommand.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/VPNCommand.java index e12f727aa..52a3dec34 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/VPNCommand.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/enums/VPNCommand.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.enums; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CachedExecutor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CachedExecutor.kt index 007ad9cfc..c83675e17 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CachedExecutor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CachedExecutor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.executors diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CoroutineExecutor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CoroutineExecutor.kt index 86d9c7d9e..2b8aa017a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CoroutineExecutor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/executors/CoroutineExecutor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.executors diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/ExternalStoragePermissions.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/ExternalStoragePermissions.java index f9b548cbf..f2c44dea2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/ExternalStoragePermissions.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/ExternalStoragePermissions.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.filemanager; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileManager.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileManager.java index e6c397b17..be690252f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileManager.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileManager.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.filemanager; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileShortener.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileShortener.java index 8257d2bc9..9576f42c2 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileShortener.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/FileShortener.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.filemanager; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnBinaryFileOperationsCompleteListener.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnBinaryFileOperationsCompleteListener.java index 778f2b7b6..968ad7102 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnBinaryFileOperationsCompleteListener.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnBinaryFileOperationsCompleteListener.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.filemanager; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnFileOperationsCompleteListener.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnFileOperationsCompleteListener.java index 38bbe24de..29334e3c3 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnFileOperationsCompleteListener.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnFileOperationsCompleteListener.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.filemanager; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnTextFileOperationsCompleteListener.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnTextFileOperationsCompleteListener.java index 2c9d5ad45..33a083047 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnTextFileOperationsCompleteListener.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/filemanager/OnTextFileOperationsCompleteListener.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.filemanager; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/integrity/Verifier.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/integrity/Verifier.java index 2b6444b7b..e5f40d073 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/integrity/Verifier.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/integrity/Verifier.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.integrity; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/logger/Logger.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/logger/Logger.kt index 5baf86bdb..70f105679 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/logger/Logger.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/logger/Logger.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.logger diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManager.kt index a6c4aa44b..191dd32bb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.mode diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManagerCallback.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManagerCallback.kt index 64cc0846a..14123f13d 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManagerCallback.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/mode/AppModeManagerCallback.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.mode diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionDialog.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionDialog.kt index 56a1e7852..d0d5e2eb1 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionDialog.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionDialog.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.notification diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionManager.kt index c9cf04b4e..74c1c41cf 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/notification/NotificationPermissionManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.notification diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/DnsCryptConfigurationParser.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/DnsCryptConfigurationParser.java index 43f722081..ccac5937f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/DnsCryptConfigurationParser.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/DnsCryptConfigurationParser.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.parsers; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/TorProjectBridgesParser.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/TorProjectBridgesParser.kt index bb82b44d7..8ac3bd715 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/TorProjectBridgesParser.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/parsers/TorProjectBridgesParser.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.parsers diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/portchecker/PortChecker.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/portchecker/PortChecker.java index 6a4846ea2..78eace877 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/portchecker/PortChecker.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/portchecker/PortChecker.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.portchecker; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelper.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelper.kt index fcb4e57d6..64c57cfc0 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelper.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelper.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.preferences diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelperImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelperImpl.kt index 6cefed04a..4940ff39b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelperImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/AppPreferenceHelperImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.preferences diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/PreferenceKeys.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/PreferenceKeys.java index 5ae1a34bc..79486afeb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/PreferenceKeys.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/preferences/PreferenceKeys.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.preferences; @@ -146,6 +146,7 @@ public interface PreferenceKeys { String DNSCRYPT_DNS64_PREFIX = "dns64_prefix"; String DNSCRYPT_OUTBOUND_PROXY = "Enable proxy"; + String DNSCRYPT_OUTBOUND_PROXY_PORT = "proxy_port"; String DNSCRYPT_SERVERS_REFRESH_DELAY = "refresh_delay"; String DNSCRYPT_RELAYS_REFRESH_DELAY = "refresh_delay_relays"; @@ -179,6 +180,7 @@ public interface PreferenceKeys { String SAVE_ROOT_LOGS = "swRootCommandsLog"; //Proxifier + String PROXIFY_NON_TOR = "ProxifyNonTor"; String PROXIFY_DNSCRYPT = "ProxifyDNSCrypt"; String PROXIFY_TOR = "ProxifyTor"; String PROXIFY_I2PD = "ProxifyITPD"; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/privatedns/PrivateDnsProxyManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/privatedns/PrivateDnsProxyManager.kt index 005c4b33d..ae7058df6 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/privatedns/PrivateDnsProxyManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/privatedns/PrivateDnsProxyManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.privatedns diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManager.kt index 15e6601c9..f8ec9c6b6 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.resources diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManagerImpl.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManagerImpl.kt index 84a55bf91..f0e8487dd 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManagerImpl.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/resources/ResourceManagerImpl.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.resources diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommands.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommands.java index 086f14c2b..22b79296c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommands.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommands.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.root; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommandsMark.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommandsMark.java index b46e492e2..d715abaf9 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommandsMark.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootCommandsMark.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.root; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecService.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecService.java index 17d89a7c6..3f1792c7c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecService.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecService.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.root; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecutor.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecutor.kt index 3869e5235..58155b39c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecutor.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootExecutor.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.root diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootServiceNotificationManager.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootServiceNotificationManager.java index 16beb6373..43d248aad 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootServiceNotificationManager.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/root/RootServiceNotificationManager.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.root; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/wakelock/WakeLocksManager.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/wakelock/WakeLocksManager.java index 575996bcb..d489e7c6a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/wakelock/WakeLocksManager.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/wakelock/WakeLocksManager.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.wakelock; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/HttpsConnectionManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/HttpsConnectionManager.kt index b7aba6aef..62f71477f 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/HttpsConnectionManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/HttpsConnectionManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.web diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TLSSocketFactory.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TLSSocketFactory.java index 586779d21..336b43f62 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TLSSocketFactory.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TLSSocketFactory.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.web; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TorRefreshIPsWork.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TorRefreshIPsWork.java index 2066e8fc0..2b236213a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TorRefreshIPsWork.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/web/TorRefreshIPsWork.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.web; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsManager.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsManager.kt index de19bf1dc..b9a1efefb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsManager.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsManager.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.workers diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsWorker.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsWorker.kt index b0f7e8837..bdc070f26 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsWorker.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/workers/UpdateIPsWorker.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.workers diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/zipUtil/ZipFileManager.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/zipUtil/ZipFileManager.java index b811f580a..3f324dc3a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/zipUtil/ZipFileManager.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/utils/zipUtil/ZipFileManager.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.zipUtil; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/FixedAppBarLayoutBehavior.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/FixedAppBarLayoutBehavior.kt index 42a2e5a97..09d272d62 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/FixedAppBarLayoutBehavior.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/FixedAppBarLayoutBehavior.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.views diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/SwitchPlusClickPreference.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/SwitchPlusClickPreference.java index 984b34257..183bb4e48 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/SwitchPlusClickPreference.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/views/SwitchPlusClickPreference.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.views; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Allowed.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Allowed.java index d6005b4a0..0e554ea1c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Allowed.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Allowed.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Forward.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Forward.java index bc195dd86..a0f28be93 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Forward.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Forward.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/IPUtil.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/IPUtil.java index c4f89b2ad..a934182eb 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/IPUtil.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/IPUtil.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Packet.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Packet.java index 8525ff2f6..1cbc773e5 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Packet.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Packet.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/ResourceRecord.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/ResourceRecord.java index 19a87b6c6..ea7ee85f4 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/ResourceRecord.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/ResourceRecord.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Rule.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Rule.java index dade18eae..a85865f60 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Rule.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Rule.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; @@ -22,7 +22,6 @@ import android.content.Context; import android.content.SharedPreferences; import android.content.pm.PackageInfo; -import android.preference.PreferenceManager; import java.util.ArrayList; import java.util.HashSet; @@ -43,6 +42,8 @@ import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.UNLOCK_APPS; import static pan.alexander.tordnscrypt.utils.preferences.PreferenceKeys.USE_PROXY; +import androidx.preference.PreferenceManager; + public class Rule { public int uid; public String packageName; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Usage.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Usage.java index 2f7fe4fa4..e977c7941 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Usage.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/Usage.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/VpnUtils.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/VpnUtils.java index 9ee3841ba..739c9435b 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/VpnUtils.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/VpnUtils.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn; @@ -54,33 +54,33 @@ public class VpnUtils { public static final ArrayList nonTorList = new ArrayList<>(Arrays.asList( /*LAN destinations that shouldn't be routed through Tor*/ - "127.0.0.0/8", - "10.0.0.0/8", - "172.16.0.0/12", - "192.168.0.0/16", + "127.0.0.0/8", //Loopback RFC1122 + "10.0.0.0/8", //Private-Use RFC1918 + "172.16.0.0/12", //Private-Use RFC1918 + "192.168.0.0/16", //Private-Use RFC1918 /*Other IANA reserved blocks (These are not processed by tor)*/ META_ADDRESS, - "100.64.0.0/10", - "169.254.0.0/16", - "192.0.0.0/24", - "192.0.2.0/24", - "192.88.99.0/24", - "198.18.0.0/15", - "198.51.100.0/24", - "203.0.113.0/24", - "224.0.0.0/4", - "240.0.0.0/4", - "255.255.255.255/32" + "100.64.0.0/10", //Shared Address Space(CGNAT) RFC6598 + "169.254.0.0/16", //Link local RFC3927 + "192.0.0.0/24", //IETF Protocol Assignments RFC6890 + "192.0.2.0/24", //Documentation(TEST-NET-1) RFC5737 + "192.88.99.0/24", //6to4 Relay Anycast RFC3068 + "198.18.0.0/15", //Benchmarking RFC2544 + "198.51.100.0/24", //Documentation(TEST-NET-2) RFC5737 + "203.0.113.0/24", //Documentation(TEST-NET-3) RFC5737 + "224.0.0.0/4", //Multicast RFC 3171 + "240.0.0.0/4", //Class E address reserved RFC1112 + "255.255.255.255/32" // Limited Broadcast RFC0919 )); public static final ArrayList nonTorIPv6 = new ArrayList<>(Arrays.asList( /*LAN destinations that shouldn't be routed through Tor*/ //https://www.rfc-editor.org/rfc/rfc3513.html - LOOPBACK_ADDRESS_IPv6, //Loopback Address - META_ADDRESS_IPv6, //Unspecified Address - "FEC0::/10", //Site-local unicast, equivalent to 10.0.0.0/8, ... - "FE80::/10", //Link-local unicast, equivalent to 169.254.0.0/16 - "FD00::/8" //Unique local address + LOOPBACK_ADDRESS_IPv6, //Loopback Address RFC4291 + META_ADDRESS_IPv6, //Unspecified Address RFC4291 + "FEC0::/10", //Site-local unicast, equivalent to 10.0.0.0/8 RFC3513 + "FE80::/10", //Link-local unicast, equivalent to 169.254.0.0/16 RFC4291 + "FD00::/8" //Unique local address RFC4193 )); public static final ArrayList multicastIPv6 = new ArrayList<>(Arrays.asList( diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/BuilderVPN.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/BuilderVPN.java index 7ffee6003..1baec98d7 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/BuilderVPN.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/BuilderVPN.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn.service; diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPN.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPN.java index d53e12ec6..3d8976f99 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPN.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPN.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn.service; @@ -182,7 +182,7 @@ public class ServiceVPN extends VpnService implements OnInternetConnectionChecke private native void jni_start(long context); @Keep - private native void jni_run(long context, int tun, boolean fwd53, int rcode, boolean compatibilityMode, boolean canFilterSynchronous); + private native void jni_run(long context, int tun, boolean fwd53, int rcode, boolean compatibilityMode, boolean canFilterSynchronous, boolean bypassLanAddresses); @Keep private native void jni_stop(long context); @@ -263,7 +263,8 @@ synchronized void startNative(final ParcelFileDescriptor vpn, List listA vpnRulesHolder.get().mapForwardPort.containsKey(PLAINTEXT_DNS_PORT), vpnPreferences.getDnsBlockedResponseCode(), vpnPreferences.getCompatibilityMode(), - canFilterSynchronous + canFilterSynchronous, + vpnPreferences.getLan() ); if (Thread.currentThread().equals(tunnelThread)) { tunnelThread = null; @@ -453,10 +454,7 @@ public boolean isRedirectToTor(int uid, String destAddress, int destPort) { return false; } - if (uid == vpnPreferences.getOwnUID() - || destAddress.equals(vpnPreferences.getItpdRedirectAddress()) - || destAddress.equals(LOOPBACK_ADDRESS) - || vpnPreferences.getFixTTL() + if (vpnPreferences.getFixTTL() || (vpnPreferences.getCompatibilityMode() && uid == SPECIAL_UID_KERNEL)) { return false; } @@ -506,10 +504,7 @@ public boolean isRedirectToProxy(int uid, String destAddress, int destPort) { return false; } - if (uid == vpnPreferences.getOwnUID() - || destAddress.equals(vpnPreferences.getItpdRedirectAddress()) - || destAddress.equals(LOOPBACK_ADDRESS) - || (vpnPreferences.getFixTTL() && !vpnPreferences.getUseProxy()) + if ((vpnPreferences.getFixTTL() && !vpnPreferences.getUseProxy()) || (vpnPreferences.getCompatibilityMode() && uid == SPECIAL_UID_KERNEL)) { return false; } @@ -933,6 +928,7 @@ void addUIDtoDNSQueryRawRecords( uid, sourceAddress, destinationAddress, + destinationPort, protocol, allowed ); diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHandler.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHandler.java index 0bfaa706f..d7b5c4294 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHandler.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHandler.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn.service; @@ -63,6 +63,7 @@ import static pan.alexander.tordnscrypt.utils.enums.ModuleState.RUNNING; import static pan.alexander.tordnscrypt.utils.enums.ModuleState.STARTING; import static pan.alexander.tordnscrypt.utils.enums.ModuleState.STOPPING; +import static pan.alexander.tordnscrypt.utils.enums.VPNCommand.STOP; import static pan.alexander.tordnscrypt.utils.logger.Logger.loge; import static pan.alexander.tordnscrypt.utils.logger.Logger.logi; import static pan.alexander.tordnscrypt.utils.logger.Logger.logw; @@ -115,16 +116,18 @@ void queue(Intent intent) { if (cmd != null) { msg.what = cmd.ordinal(); removeMessages(msg.what); - sendMessage(msg); + if (cmd != STOP) { + sendMessage(msg); + } else { + sendMessageDelayed(msg, 1000); + } } } @Override public void handleMessage(@NonNull Message msg) { try { - //synchronized (serviceVPN) { handleIntent((Intent) msg.obj); - //} } catch (Throwable ex) { loge("ServiceVPNHandler handleMessage", ex, true); } diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHelper.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHelper.java index 66a0a9050..227581b9c 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHelper.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/ServiceVPNHelper.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn.service; @@ -24,7 +24,6 @@ import android.content.Intent; import android.content.SharedPreferences; import android.os.Build; -import android.preference.PreferenceManager; import java.util.concurrent.locks.ReentrantLock; @@ -44,6 +43,8 @@ import static pan.alexander.tordnscrypt.vpn.service.ServiceVPN.EXTRA_COMMAND; import static pan.alexander.tordnscrypt.vpn.service.ServiceVPN.EXTRA_REASON; +import androidx.preference.PreferenceManager; + public class ServiceVPNHelper { private static final ReentrantLock reentrantLock = new ReentrantLock(); @@ -100,7 +101,7 @@ public static void prepareVPNServiceIfRequired(Activity activity, ModulesStatus boolean fixTTL = modulesStatus.isFixTTL() && (modulesStatus.getMode() == ROOT_MODE) && !modulesStatus.isUseModulesWithRoot(); - SharedPreferences prefs = android.preference.PreferenceManager.getDefaultSharedPreferences(activity); + SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(activity); if (((operationMode == VPN_MODE) || fixTTL) && activity instanceof MainActivity && !prefs.getBoolean(VPN_SERVICE_ENABLED, false)) { diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnBuilder.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnBuilder.java index 690825439..21d3801e8 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnBuilder.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnBuilder.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn.service; @@ -185,9 +185,9 @@ BuilderVPN getBuilder(ServiceVPN vpn, List listAllowed, List listR listExclude.add(new IPUtil.CIDR("192.168.49.0", 24)); } - //if (!firewallEnabled || lan) { - listExclude.add(new IPUtil.CIDR("224.0.0.0", 4)); // Broadcast - //} + if (lan) { + listExclude.add(new IPUtil.CIDR("224.0.0.0", 4)); // Multicast + } // Subnet routing if (!listExclude.isEmpty()) { @@ -207,7 +207,7 @@ BuilderVPN getBuilder(ServiceVPN vpn, List listAllowed, List listR start = IPUtil.plus1(exclude.getEnd()); } String end = (lan ? "255.255.255.254" : "255.255.255.255"); - for (IPUtil.CIDR include : IPUtil.toCIDR("224.0.0.0", end)) + for (IPUtil.CIDR include : IPUtil.toCIDR(lan ? "240.0.0.0" : "224.0.0.0", end)) try { builder.addRoute(include.address, include.prefix); } catch (Throwable ex) { diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnPreferenceHolder.kt b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnPreferenceHolder.kt index 771895a32..eb84b8a4a 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnPreferenceHolder.kt +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnPreferenceHolder.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn.service @@ -68,7 +68,7 @@ class VpnPreferenceHolder @Inject constructor( val firewallEnabled = preferenceRepository.getBoolPreference(FIREWALL_ENABLED) val ignoreSystemDNS = defaultPreferences.getBoolean(IGNORE_SYSTEM_DNS, false) - val proxyAddress = defaultPreferences.getString(PROXY_ADDRESS, LOOPBACK_ADDRESS) ?: LOOPBACK_ADDRESS + val proxyAddress = defaultPreferences.getString(PROXY_ADDRESS, LOOPBACK_ADDRESS)?.take(46) ?: LOOPBACK_ADDRESS val proxyPort = defaultPreferences.getString(PROXY_PORT, DEFAULT_PROXY_PORT).let { if (it?.matches(Regex(NUMBER_REGEX)) == true && it.toLong() <= MAX_PORT_NUMBER) { it.toInt() @@ -76,8 +76,8 @@ class VpnPreferenceHolder @Inject constructor( DEFAULT_PROXY_PORT.toInt() } } - val proxyUser = defaultPreferences.getString(PROXY_USER, "") ?: "" - val proxyPass = defaultPreferences.getString(PROXY_PASS, "") ?: "" + val proxyUser = defaultPreferences.getString(PROXY_USER, "")?.take(127) ?: "" + val proxyPass = defaultPreferences.getString(PROXY_PASS, "")?.take(127) ?: "" val useProxy = defaultPreferences.getBoolean(USE_PROXY, false) && proxyAddress.isNotBlank() diff --git a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnRulesHolder.java b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnRulesHolder.java index c5af713c9..dd36ee2e4 100644 --- a/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnRulesHolder.java +++ b/tordnscrypt/src/main/java/pan/alexander/tordnscrypt/vpn/service/VpnRulesHolder.java @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.vpn.service; @@ -197,9 +197,9 @@ && isIpInLanRange(packet.daddr)) { modulesStatus.getTorState() != STOPPED && !vpnPreferences.getUseIPv6Tor() || fixTTLForPacket - //|| packet.dport == PLAINTEXT_DNS_PORT - //|| (torIsRunning && redirectToTor) - || (vpnPreferences.getUseProxy() && redirectToProxy)) + || (vpnPreferences.getUseProxy() + && (!vpnPreferences.getProxyAddress().equals(LOOPBACK_ADDRESS) + || vpnPreferences.getBlockIPv6DnsCrypt()))) && (packet.saddr.contains(":") || packet.daddr.contains(":"))) { logi("Block ipv6 " + packet); } else if (vpnPreferences.getBlockHttp() && packet.dport == 80 @@ -300,7 +300,8 @@ && isPacketAllowedForCompatibilityMode(packet, fixTTLForPacket)) { vpn.addUIDtoDNSQueryRawRecords( packet.uid, packet.daddr, - packet.dport, + //Unknown incoming packet or Multicast DNS + (packet.uid == -1 || packet.uid == 0 || packet.uid == 1020) && packet.sport < packet.dport ? packet.sport : packet.dport, packet.saddr, packet.allowed, packet.protocol @@ -472,18 +473,20 @@ void prepareForwarding() { boolean dnsCryptReady = modulesStatus.isDnsCryptReady(); boolean torReady = modulesStatus.isTorReady(); - boolean systemDNSAllowed = modulesStatus.isSystemDNSAllowed(); //If Tor is ready and DNSCrypt is not, app will use Tor Exit node DNS in VPN mode - if (dnsCryptState == RUNNING && (dnsCryptReady || !systemDNSAllowed)) { + if (dnsCryptState == RUNNING && dnsCryptReady) { forwardDnsToDnsCrypt(dnsCryptPort, ownUID); if (itpdState == RUNNING) { forwardAddressToITPD(itpdHttpPort, ownUID); } - } else if (torState == RUNNING && (torReady || !systemDNSAllowed)) { + } else if (torState == RUNNING && torReady) { forwardDnsToTor(torDNSPort, ownUID); } else if (dnsCryptState != STOPPED) { forwardDnsToDnsCrypt(dnsCryptPort, ownUID); + if (itpdState == RUNNING) { + forwardAddressToITPD(itpdHttpPort, ownUID); + } } else if (torState != STOPPED) { forwardDnsToTor(torDNSPort, ownUID); } else if (firewallState == STARTING || firewallState == RUNNING) { diff --git a/tordnscrypt/src/main/jni/invizible/dhcp.c b/tordnscrypt/src/main/jni/invizible/dhcp.c index 7148ae37d..2cf2cae56 100644 --- a/tordnscrypt/src/main/jni/invizible/dhcp.c +++ b/tordnscrypt/src/main/jni/invizible/dhcp.c @@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" diff --git a/tordnscrypt/src/main/jni/invizible/dns.c b/tordnscrypt/src/main/jni/invizible/dns.c index 93d421349..8d7faef80 100644 --- a/tordnscrypt/src/main/jni/invizible/dns.c +++ b/tordnscrypt/src/main/jni/invizible/dns.c @@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" diff --git a/tordnscrypt/src/main/jni/invizible/icmp.c b/tordnscrypt/src/main/jni/invizible/icmp.c index 004e0b014..c548e9f14 100644 --- a/tordnscrypt/src/main/jni/invizible/icmp.c +++ b/tordnscrypt/src/main/jni/invizible/icmp.c @@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" @@ -186,7 +186,7 @@ void check_icmp_socket(const struct arguments *args, const struct epoll_event *e } jboolean is_icmp_supported(const uint8_t *pkt, - const uint8_t *payload) { + const uint8_t *payload) { // Get headers const uint8_t version = (*pkt) >> 4; const struct iphdr *ip4 = (struct iphdr *) pkt; @@ -237,7 +237,7 @@ jboolean handle_icmp(const struct arguments *args, while (cur != NULL && !((cur->protocol == IPPROTO_ICMP || cur->protocol == IPPROTO_ICMPV6) && !cur->icmp.stop && cur->icmp.version == version && - cur->icmp.id == icmp->icmp_id && + cur->icmp.id == icmp->icmp_id && (version == 4 ? cur->icmp.saddr.ip4 == ip4->saddr && cur->icmp.daddr.ip4 == ip4->daddr : memcmp(&cur->icmp.saddr.ip6, &ip6->ip6_src, 16) == 0 && @@ -358,6 +358,13 @@ int open_icmp_socket(const struct arguments *args, const struct icmp_session *cu if (protect_socket(args, sock, cur->uid) < 0) return -1; + // Set non blocking + int flags = fcntl(sock, F_GETFL, 0); + if (flags < 0 || fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) { + log_android(ANDROID_LOG_ERROR, "fcntl socket O_NONBLOCK error %d: %s", + errno, strerror(errno)); + return -1; + } return sock; } diff --git a/tordnscrypt/src/main/jni/invizible/invizible.c b/tordnscrypt/src/main/jni/invizible/invizible.c index 708a7651a..43aa89554 100644 --- a/tordnscrypt/src/main/jni/invizible/invizible.c +++ b/tordnscrypt/src/main/jni/invizible/invizible.c @@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" @@ -41,6 +41,7 @@ char proxy_socks5_password[127 + 1]; int own_uid = -2; bool compatibility_mode = false; bool can_filter = true; +bool bypass_lan = true; extern int max_tun_msg; @@ -166,13 +167,14 @@ Java_pan_alexander_tordnscrypt_vpn_service_ServiceVPN_jni_1start( JNIEXPORT void JNICALL Java_pan_alexander_tordnscrypt_vpn_service_ServiceVPN_jni_1run( JNIEnv *env, jobject instance, jlong context, jint tun, jboolean fwd53, jint rcode, - jboolean compatibility, jboolean filter) { + jboolean compatibility, jboolean filter, jboolean lan) { struct context *ctx = (struct context *) context; log_android(ANDROID_LOG_WARN, "Running tun %d fwd53 %d level %d", tun, fwd53, LOG_LEVEL); compatibility_mode = compatibility; can_filter = filter; + bypass_lan = lan; // Set blocking int flags = fcntl(tun, F_GETFL, 0); diff --git a/tordnscrypt/src/main/jni/invizible/invizible.h b/tordnscrypt/src/main/jni/invizible/invizible.h index 90fc8e7a7..b39be0c0d 100644 --- a/tordnscrypt/src/main/jni/invizible/invizible.h +++ b/tordnscrypt/src/main/jni/invizible/invizible.h @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include #include @@ -73,6 +73,7 @@ #define UDP_TIMEOUT_53 15 // seconds #define UDP_TIMEOUT_ANY 300 // seconds #define UDP_KEEP_TIMEOUT 60 // seconds +#define TCP_YIELD 10 // packets #define UDP_YIELD 10 // packets #define TCP_INIT_TIMEOUT 20 // seconds ~net.inet.tcp.keepinit @@ -96,6 +97,9 @@ #define LOOPBACK_ADDRESS "127.0.0.1" #define LOOPBACK_ADDRESS_IPv6 "::1" +#define LOOPBACK_ADDRESS_MAPPED_IPv6 "::ffff:127.0.0.1" + +#define I2PD_REDIRECT_ADDRESS "10.191.0.1" struct context { pthread_mutex_t lock; @@ -583,6 +587,10 @@ int is_writable(int fd); long long get_ms(); +int str_equal(const char *s, const char *f); + +int str_ends_with(const char *s, const char *suff); + void ng_add_alloc(const char *ptr, const char *tag); void ng_delete_alloc(const char *ptr, const char *file, int line); diff --git a/tordnscrypt/src/main/jni/invizible/ip.c b/tordnscrypt/src/main/jni/invizible/ip.c index 5a90bda45..2eefc1e7c 100644 --- a/tordnscrypt/src/main/jni/invizible/ip.c +++ b/tordnscrypt/src/main/jni/invizible/ip.c @@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" diff --git a/tordnscrypt/src/main/jni/invizible/neon2sse.h b/tordnscrypt/src/main/jni/invizible/neon2sse.h new file mode 100644 index 000000000..6ba738a81 --- /dev/null +++ b/tordnscrypt/src/main/jni/invizible/neon2sse.h @@ -0,0 +1,16969 @@ +//created by Victoria Zhislina, the Senior Application Engineer, Intel Corporation + +//*** Copyright (C) 2012-2022 Intel Corporation. All rights reserved. + +//IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. + +//By downloading, copying, installing or using the software you agree to this license. +//If you do not agree to this license, do not download, install, copy or use the software. + +// License Agreement +//Redistribution and use in source and binary forms, with or without modification, +//are permitted provided that the following conditions are met: + +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. + +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. + +//This software is provided by the copyright holders and contributors "as is" and +//any express or implied warranties, including, but not limited to, the implied +//warranties of merchantability and fitness for a particular purpose are disclaimed. +//In no event shall the Intel Corporation or contributors be liable for any direct, +//indirect, incidental, special, exemplary, or consequential damages +//(including, but not limited to, procurement of substitute goods or services; +//loss of use, data, or profits; or business interruption) however caused +//and on any theory of liability, whether in contract, strict liability, +//or tort (including negligence or otherwise) arising in any way out of +//the use of this software, even if advised of the possibility of such damage. + +//***************************************************************************************** +// This file is intended to simplify ARM->IA32 porting +// It makes the correspondence between ARM NEON intrinsics (as defined in "arm_neon.h") +// and x86 SIMD (up to AVX2) intrinsic functions as defined in headers files below +//MMX instruction set is not used due to non availability on x64 systems, +//performance overhead and the necessity to use the EMMS instruction (_mm_empty())for mmx-x87 floating point switching +//***************************************************************************************** + +//!!!!!!!!!!!!!! To use this file just include it in your project that uses ARM NEON intrinsics instead of "arm_neon.h" and compile it as usual +// but please pay attention at #define USE_SSE4 and #define USE_AVX2 - you might need to define it manually for Intel CPUs supporting SSE4 / AVX2 for greater performance. + +#ifndef NEON2SSE_H +#define NEON2SSE_H + +/*********************************************************************************************************************/ +//!!!!!!!!!!!!!! +//if USE_SSE4 is defined, some functions use SSE4 instructions instead of earlier SSE versions, when undefined - SIMD up to SSSE3 are used +//For older devices without SSE4 support it should be undefined, for newer devices - defined, probably manually if your compiler doesn't set __SSE4_2__ predefine +#ifndef USE_SSE4 +# if defined(__SSE4_2__) +# define USE_SSE4 +# endif +#endif +//if USE_AVX2 is defined, some functions use AVX2 instructions instead of earlier SSE versions, when undefined - SSE instuctions only are used +//For target CPUs without AVX2 support it should be undefined, otherwise - defined, probably manually if your compiler doesn't set __AVX2__ predefine +//Please notice that USE_AVX2 also defines USE_SSE4 because all CPUs that support AVX2 also support SSE4 +#ifndef USE_AVX2 +# if defined(__AVX2__) +# define USE_AVX2 +# endif +#endif +#ifdef USE_AVX2 +# ifndef USE_SSE4 +# define USE_SSE4 +# endif +#endif +/*********************************************************************************************************************/ + +#include //SSE +#include //SSE2 +#include //SSE3 +#include //SSSE3 +#ifdef USE_SSE4 +# include //SSE4.1 +# include //SSE4.2 +#endif +#ifdef USE_AVX2 +#include //AVX2 +#endif +#include + +//*************** functions and data attributes, compiler dependent ********************************* +//*********************************************************************************** +#ifdef __GNUC__ +# define _GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +# define _NEON2SSESTORAGE static +# define _NEON2SSE_ALIGN_16 __attribute__((aligned(16))) +# define _NEON2SSE_ALIGN_32 __attribute__((aligned(32))) +# ifdef __clang__ +# define _NEON2SSE_INLINE _NEON2SSESTORAGE inline __attribute__((__gnu_inline__, __always_inline__)) +# else +# define _NEON2SSE_INLINE _NEON2SSESTORAGE inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +# endif +# ifndef NEON2SSE_DISABLE_PERFORMANCE_WARNING +# if _GCC_VERSION < 40500 +# define _NEON2SSE_PERFORMANCE_WARNING(function, explanation) __attribute__((deprecated)) function +# else +# define _NEON2SSE_PERFORMANCE_WARNING(function, explanation) __attribute__((deprecated(explanation))) function +# endif +# else +# define _NEON2SSE_PERFORMANCE_WARNING(function, explanation) function +# endif +# if defined(__x86_64__) +# define _NEON2SSE_64BIT __x86_64__ +# endif +#else +# define _NEON2SSESTORAGE static +# define _NEON2SSE_ALIGN_16 __declspec(align(16)) +# define _NEON2SSE_ALIGN_32 __declspec(align(32)) +# define _NEON2SSE_INLINE _NEON2SSESTORAGE __inline +# if (defined(_MSC_VER) || defined (__INTEL_COMPILER)) && !defined(NEON2SSE_DISABLE_PERFORMANCE_WARNING) +# define _NEON2SSE_PERFORMANCE_WARNING(function, EXPLANATION) __declspec(deprecated(EXPLANATION)) function +# if defined(_M_X64) +# define _NEON2SSE_64BIT _M_X64 +# endif +# else +# define _NEON2SSE_PERFORMANCE_WARNING(function, explanation) function +# endif +#endif + +/* Used to mark the intinsics that are declared as functions, but implemented as macros */ +#define _NEON2SSE_GLOBAL + +#if defined (_NEON2SSE_64BIT) && defined (USE_SSE4) +# define _NEON2SSE_64BIT_SSE4 +#endif + +#ifndef UNREFERENCED_PARAM +# define UNREFERENCED_PARAM(P) ((void)(P)) +#endif + +/*********************************************************************************************************************/ +// data types conversion +/*********************************************************************************************************************/ +#if defined(_MSC_VER) && (_MSC_VER < 1300) + typedef signed char int8_t; + typedef unsigned char uint8_t; + typedef signed short int16_t; + typedef unsigned short uint16_t; + typedef signed int int32_t; + typedef unsigned int uint32_t; + typedef signed long long int64_t; + typedef unsigned long long uint64_t; +#elif defined(_MSC_VER) + typedef signed __int8 int8_t; + typedef unsigned __int8 uint8_t; + typedef signed __int16 int16_t; + typedef unsigned __int16 uint16_t; + typedef signed __int32 int32_t; + typedef unsigned __int32 uint32_t; + + typedef signed long long int64_t; + typedef unsigned long long uint64_t; +#else +# include +# include +#endif + + +typedef float float32_t; +#if !defined(__clang__) +typedef float __fp16; +#endif + +typedef double float64_t; + +typedef union __m64_128 { + uint64_t m64_u64[1]; + int64_t m64_i64[1]; + float64_t m64_d64[1]; + uint32_t m64_u32[2]; + int32_t m64_i32[2]; + float32_t m64_f32[2]; + int16_t m64_i16[4]; + uint16_t m64_u16[4]; + int8_t m64_i8[8]; + uint8_t m64_u8[8]; +} __m64_128; + +typedef __m64_128 int8x8_t; +typedef __m64_128 uint8x8_t; +typedef __m64_128 int16x4_t; +typedef __m64_128 uint16x4_t; +typedef __m64_128 int32x2_t; +typedef __m64_128 uint32x2_t; +typedef __m64_128 int64x1_t; +typedef __m64_128 uint64x1_t; +typedef __m64_128 poly8x8_t; +typedef __m64_128 poly16x4_t; + +typedef __m64_128 float32x2_t; +typedef __m128 float32x4_t; + +typedef __m128 float16x4_t; //not supported by IA, for compartibility +typedef __m128 float16x8_t; //not supported by IA, for compartibility + +typedef __m64_128 float64x1_t; +typedef __m128d float64x2_t; + +typedef __m128i int8x16_t; +typedef __m128i int16x8_t; +typedef __m128i int32x4_t; +typedef __m128i int64x2_t; +typedef __m128i uint8x16_t; +typedef __m128i uint16x8_t; +typedef __m128i uint32x4_t; +typedef __m128i uint64x2_t; +typedef __m128i poly8x16_t; +typedef __m128i poly16x8_t; + +#if defined(_MSC_VER) +# define SINT_MIN (-2147483647 - 1) /* min signed int value */ +# define SINT_MAX 2147483647 /* max signed int value */ +#else +# define SINT_MIN INT_MIN /* min signed int value */ +# define SINT_MAX INT_MAX /* max signed int value */ +#endif + +typedef uint8_t poly8_t; +typedef uint16_t poly16_t; + + +//MSVC compilers (tested up to 2012 VS version) doesn't allow using structures or arrays of __m128x type as functions arguments resulting in +//error C2719: 'src': formal parameter with __declspec(align('16')) won't be aligned. To avoid it we need the special trick for functions that use these types +struct int8x16x2_t { + int8x16_t val[2]; +}; +struct int16x8x2_t { + int16x8_t val[2]; +}; +struct int32x4x2_t { + int32x4_t val[2]; +}; +struct int64x2x2_t { + int64x2_t val[2]; +}; +//Unfortunately we are unable to merge two 64-bits in on 128 bit register because user should be able to access val[n] members explicitly!!! +struct int8x8x2_t { + int8x8_t val[2]; +}; +struct int16x4x2_t { + int16x4_t val[2]; +}; +struct int32x2x2_t { + int32x2_t val[2]; +}; +struct int64x1x2_t { + int64x1_t val[2]; +}; + +typedef struct int8x16x2_t int8x16x2_t; //for C compilers to make them happy +typedef struct int16x8x2_t int16x8x2_t; //for C compilers to make them happy +typedef struct int32x4x2_t int32x4x2_t; //for C compilers to make them happy +typedef struct int64x2x2_t int64x2x2_t; //for C compilers to make them happy + +typedef struct int8x8x2_t int8x8x2_t; //for C compilers to make them happy +typedef struct int16x4x2_t int16x4x2_t; //for C compilers to make them happy +typedef struct int32x2x2_t int32x2x2_t; //for C compilers to make them happy +typedef struct int64x1x2_t int64x1x2_t; //for C compilers to make them happy + +/* to avoid pointer conversions the following unsigned integers structures are defined via the corresponding signed integers structures above */ +typedef struct int8x16x2_t uint8x16x2_t; +typedef struct int16x8x2_t uint16x8x2_t; +typedef struct int32x4x2_t uint32x4x2_t; +typedef struct int64x2x2_t uint64x2x2_t; +typedef struct int8x16x2_t poly8x16x2_t; +typedef struct int16x8x2_t poly16x8x2_t; + +typedef struct int8x8x2_t uint8x8x2_t; +typedef struct int16x4x2_t uint16x4x2_t; +typedef struct int32x2x2_t uint32x2x2_t; +typedef struct int64x1x2_t uint64x1x2_t; +typedef struct int8x8x2_t poly8x8x2_t; +typedef struct int16x4x2_t poly16x4x2_t; + +//float +struct float32x4x2_t { + float32x4_t val[2]; +}; +struct float16x8x2_t { + float16x8_t val[2]; +}; +struct float32x2x2_t { + float32x2_t val[2]; +}; + +typedef struct float32x4x2_t float32x4x2_t; //for C compilers to make them happy +typedef struct float16x8x2_t float16x8x2_t; //for C compilers to make them happy +typedef struct float32x2x2_t float32x2x2_t; //for C compilers to make them happy +typedef float16x8x2_t float16x4x2_t; + +//4 +struct int8x16x4_t { + int8x16_t val[4]; +}; +struct int16x8x4_t { + int16x8_t val[4]; +}; +struct int32x4x4_t { + int32x4_t val[4]; +}; +struct int64x2x4_t { + int64x2_t val[4]; +}; + +struct int8x8x4_t { + int8x8_t val[4]; +}; +struct int16x4x4_t { + int16x4_t val[4]; +}; +struct int32x2x4_t { + int32x2_t val[4]; +}; +struct int64x1x4_t { + int64x1_t val[4]; +}; + +typedef struct int8x16x4_t int8x16x4_t; //for C compilers to make them happy +typedef struct int16x8x4_t int16x8x4_t; //for C compilers to make them happy +typedef struct int32x4x4_t int32x4x4_t; //for C compilers to make them happy +typedef struct int64x2x4_t int64x2x4_t; //for C compilers to make them happy + +typedef struct int8x8x4_t int8x8x4_t; //for C compilers to make them happy +typedef struct int16x4x4_t int16x4x4_t; //for C compilers to make them happy +typedef struct int32x2x4_t int32x2x4_t; //for C compilers to make them happy +typedef struct int64x1x4_t int64x1x4_t; //for C compilers to make them happy + +/* to avoid pointer conversions the following unsigned integers structures are defined via the corresponding signed integers dealing structures above:*/ +typedef struct int8x8x4_t uint8x8x4_t; +typedef struct int16x4x4_t uint16x4x4_t; +typedef struct int32x2x4_t uint32x2x4_t; +typedef struct int64x1x4_t uint64x1x4_t; +typedef struct int8x8x4_t poly8x8x4_t; +typedef struct int16x4x4_t poly16x4x4_t; + +typedef struct int8x16x4_t uint8x16x4_t; +typedef struct int16x8x4_t uint16x8x4_t; +typedef struct int32x4x4_t uint32x4x4_t; +typedef struct int64x2x4_t uint64x2x4_t; +typedef struct int8x16x4_t poly8x16x4_t; +typedef struct int16x8x4_t poly16x8x4_t; + +struct float32x4x4_t { + float32x4_t val[4]; +}; +struct float16x8x4_t { + float16x8_t val[4]; +}; +struct float32x2x4_t { + float32x2_t val[4]; +}; + +typedef struct float32x4x4_t float32x4x4_t; //for C compilers to make them happy +typedef struct float16x8x4_t float16x8x4_t; //for C compilers to make them happy +typedef struct float32x2x4_t float32x2x4_t; //for C compilers to make them happy +typedef float16x8x4_t float16x4x4_t; + +//3 +struct int16x8x3_t { + int16x8_t val[3]; +}; +struct int32x4x3_t { + int32x4_t val[3]; +}; +struct int64x2x3_t { + int64x2_t val[3]; +}; +struct int8x16x3_t { + int8x16_t val[3]; +}; + +struct int16x4x3_t { + int16x4_t val[3]; +}; +struct int32x2x3_t { + int32x2_t val[3]; +}; +struct int64x1x3_t { + int64x1_t val[3]; +}; +struct int8x8x3_t { + int8x8_t val[3]; +}; +typedef struct int16x8x3_t int16x8x3_t; //for C compilers to make them happy +typedef struct int32x4x3_t int32x4x3_t; //for C compilers to make them happy +typedef struct int64x2x3_t int64x2x3_t; //for C compilers to make them happy +typedef struct int8x16x3_t int8x16x3_t; //for C compilers to make them happy + +typedef struct int8x8x3_t int8x8x3_t; //for C compilers to make them happy +typedef struct int16x4x3_t int16x4x3_t; //for C compilers to make them happy +typedef struct int32x2x3_t int32x2x3_t; //for C compilers to make them happy +typedef struct int64x1x3_t int64x1x3_t; //for C compilers to make them happy + + +/* to avoid pointer conversions the following unsigned integers structures are defined via the corresponding signed integers dealing structures above:*/ +typedef struct int8x16x3_t uint8x16x3_t; +typedef struct int16x8x3_t uint16x8x3_t; +typedef struct int32x4x3_t uint32x4x3_t; +typedef struct int64x2x3_t uint64x2x3_t; +typedef struct int8x16x3_t poly8x16x3_t; +typedef struct int16x8x3_t poly16x8x3_t; +typedef struct int8x8x3_t uint8x8x3_t; +typedef struct int16x4x3_t uint16x4x3_t; +typedef struct int32x2x3_t uint32x2x3_t; +typedef struct int64x1x3_t uint64x1x3_t; +typedef struct int8x8x3_t poly8x8x3_t; +typedef struct int16x4x3_t poly16x4x3_t; + +//float +struct float32x4x3_t { + float32x4_t val[3]; +}; +struct float32x2x3_t { + float32x2_t val[3]; +}; +struct float16x8x3_t { + float16x8_t val[3]; +}; + +typedef struct float32x4x3_t float32x4x3_t; //for C compilers to make them happy +typedef struct float16x8x3_t float16x8x3_t; //for C compilers to make them happy +typedef struct float32x2x3_t float32x2x3_t; //for C compilers to make them happy +typedef float16x8x3_t float16x4x3_t; + + +//**************************************************************************** +//****** Porting auxiliary macros ******************************************** + +//** floating point related macros ** +#define _M128i(a) _mm_castps_si128(a) +#define _M128(a) _mm_castsi128_ps(a) +#ifdef USE_AVX2 +#define _M256(a) _mm256_castsi256_ps(a) +#endif +//here the most performance effective implementation is compiler and 32/64 bits build dependent +#if defined (_NEON2SSE_64BIT) || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) ) +# define _pM128i(a) _mm_cvtsi64_si128(*(int64_t*)(&(a))) +# define _M64(out, inp) out.m64_i64[0] = _mm_cvtsi128_si64 (inp); +# define _M64f(out, inp) out.m64_i64[0] = _mm_cvtsi128_si64 (_M128i(inp)); +#else + //for 32bit gcc and Microsoft compilers builds +# define _pM128i(a) _mm_loadl_epi64((__m128i*)&(a)) +# define _M64(out, inp) _mm_storel_epi64 ((__m128i*)&(out), inp) +# define _M64f(out, inp) _mm_storel_epi64 ((__m128i*)&(out), _M128i(inp)) +#endif +#define _pM128(a) _mm_castsi128_ps(_pM128i(a)) + +#define return64(a) _M64(res64,a); return res64; +#define return64f(a) _M64f(res64,a); return res64; + +#define _Ui64(a) (*(uint64_t*)&(a)) +#define _UNSIGNED_T(a) u ## a + +#define _SIGNBIT64 ((uint64_t)1 << 63) +#define _SWAP_HI_LOW32 (2 | (3 << 2) | (0 << 4) | (1 << 6)) +#define _INSERTPS_NDX(srcField, dstField) (((srcField) << 6) | ((dstField) << 4) ) + +#define _NEON2SSE_REASON_SLOW_SERIAL "The function may be very slow due to the serial implementation, please try to avoid it" +#define _NEON2SSE_REASON_SLOW_UNEFFECTIVE "The function may be slow due to inefficient x86 SIMD implementation, please try to avoid it" + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +#define __constrange(min,max) const +#define __transfersize(size) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& mask constants used in porting &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& +_NEON2SSE_ALIGN_32 static const int8_t mask8_16_even_odd[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }; +_NEON2SSE_ALIGN_32 static const int8_t mask8_32_even_odd[32] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 }; +//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& + +//************************************************************************* +//************************************************************************* +//********* Functions declarations as declared in original arm_neon.h ***** +//************************************************************************* +//Vector add: vadd -> Vr[i]:=Va[i]+Vb[i], Vr, Va, Vb have equal lane sizes. +_NEON2SSESTORAGE int8x8_t vadd_s8(int8x8_t a, int8x8_t b); // VADD.I8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vadd_s16(int16x4_t a, int16x4_t b); // VADD.I16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vadd_s32(int32x2_t a, int32x2_t b); // VADD.I32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vadd_s64(int64x1_t a, int64x1_t b); // VADD.I64 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vadd_f32(float32x2_t a, float32x2_t b); // VADD.F32 d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vadd_u8(uint8x8_t a, uint8x8_t b); // VADD.I8 d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vadd_u16(uint16x4_t a, uint16x4_t b); // VADD.I16 d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vadd_u32(uint32x2_t a, uint32x2_t b); // VADD.I32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vadd_u64(uint64x1_t a, uint64x1_t b); // VADD.I64 d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vaddq_s8(int8x16_t a, int8x16_t b); // VADD.I8 q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vaddq_s16(int16x8_t a, int16x8_t b); // VADD.I16 q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vaddq_s32(int32x4_t a, int32x4_t b); // VADD.I32 q0,q0,q0 +_NEON2SSE_GLOBAL int64x2_t vaddq_s64(int64x2_t a, int64x2_t b); // VADD.I64 q0,q0,q0 +_NEON2SSE_GLOBAL float32x4_t vaddq_f32(float32x4_t a, float32x4_t b); // VADD.F32 q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vaddq_u8(uint8x16_t a, uint8x16_t b); // VADD.I8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vaddq_u16(uint16x8_t a, uint16x8_t b); // VADD.I16 q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vaddq_u32(uint32x4_t a, uint32x4_t b); // VADD.I32 q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b); // VADD.I64 q0,q0,q0 +//Vector long add: vaddl -> Vr[i]:=Va[i]+Vb[i], Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width. +_NEON2SSESTORAGE int16x8_t vaddl_s8(int8x8_t a, int8x8_t b); // VADDL.S8 q0,d0,d0 +_NEON2SSESTORAGE int32x4_t vaddl_s16(int16x4_t a, int16x4_t b); // VADDL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vaddl_s32(int32x2_t a, int32x2_t b); // VADDL.S32 q0,d0,d0 +_NEON2SSESTORAGE uint16x8_t vaddl_u8(uint8x8_t a, uint8x8_t b); // VADDL.U8 q0,d0,d0 +_NEON2SSESTORAGE uint32x4_t vaddl_u16(uint16x4_t a, uint16x4_t b); // VADDL.U16 q0,d0,d0 +_NEON2SSESTORAGE uint64x2_t vaddl_u32(uint32x2_t a, uint32x2_t b); // VADDL.U32 q0,d0,d0 +//Vector wide addw: vadd -> Vr[i]:=Va[i]+Vb[i] +_NEON2SSESTORAGE int16x8_t vaddw_s8(int16x8_t a, int8x8_t b); // VADDW.S8 q0,q0,d0 +_NEON2SSESTORAGE int32x4_t vaddw_s16(int32x4_t a, int16x4_t b); // VADDW.S16 q0,q0,d0 +_NEON2SSESTORAGE int64x2_t vaddw_s32(int64x2_t a, int32x2_t b); // VADDW.S32 q0,q0,d0 +_NEON2SSESTORAGE uint16x8_t vaddw_u8(uint16x8_t a, uint8x8_t b); // VADDW.U8 q0,q0,d0 +_NEON2SSESTORAGE uint32x4_t vaddw_u16(uint32x4_t a, uint16x4_t b); // VADDW.U16 q0,q0,d0 +_NEON2SSESTORAGE uint64x2_t vaddw_u32(uint64x2_t a, uint32x2_t b); // VADDW.U32 q0,q0,d0 +//Vector halving add: vhadd -> Vr[i]:=(Va[i]+Vb[i])>>1 +_NEON2SSESTORAGE int8x8_t vhadd_s8(int8x8_t a, int8x8_t b); // VHADD.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vhadd_s16(int16x4_t a, int16x4_t b); // VHADD.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vhadd_s32(int32x2_t a, int32x2_t b); // VHADD.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vhadd_u8(uint8x8_t a, uint8x8_t b); // VHADD.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vhadd_u16(uint16x4_t a, uint16x4_t b); // VHADD.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vhadd_u32(uint32x2_t a, uint32x2_t b); // VHADD.U32 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vhaddq_s8(int8x16_t a, int8x16_t b); // VHADD.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vhaddq_s16(int16x8_t a, int16x8_t b); // VHADD.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vhaddq_s32(int32x4_t a, int32x4_t b); // VHADD.S32 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vhaddq_u8(uint8x16_t a, uint8x16_t b); // VHADD.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vhaddq_u16(uint16x8_t a, uint16x8_t b); // VHADD.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vhaddq_u32(uint32x4_t a, uint32x4_t b); // VHADD.U32 q0,q0,q0 +//Vector rounding halving add: vrhadd -> Vr[i]:=(Va[i]+Vb[i]+1)>>1 +_NEON2SSESTORAGE int8x8_t vrhadd_s8(int8x8_t a, int8x8_t b); // VRHADD.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vrhadd_s16(int16x4_t a, int16x4_t b); // VRHADD.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vrhadd_s32(int32x2_t a, int32x2_t b); // VRHADD.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vrhadd_u8(uint8x8_t a, uint8x8_t b); // VRHADD.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vrhadd_u16(uint16x4_t a, uint16x4_t b); // VRHADD.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vrhadd_u32(uint32x2_t a, uint32x2_t b); // VRHADD.U32 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vrhaddq_s8(int8x16_t a, int8x16_t b); // VRHADD.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vrhaddq_s16(int16x8_t a, int16x8_t b); // VRHADD.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vrhaddq_s32(int32x4_t a, int32x4_t b); // VRHADD.S32 q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vrhaddq_u8(uint8x16_t a, uint8x16_t b); // VRHADD.U8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vrhaddq_u16(uint16x8_t a, uint16x8_t b); // VRHADD.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vrhaddq_u32(uint32x4_t a, uint32x4_t b); // VRHADD.U32 q0,q0,q0 +//Vector saturating add: vqadd -> Vr[i]:=sat(Va[i]+Vb[i]) +_NEON2SSESTORAGE int8x8_t vqadd_s8(int8x8_t a, int8x8_t b); // VQADD.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vqadd_s16(int16x4_t a, int16x4_t b); // VQADD.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vqadd_s32(int32x2_t a, int32x2_t b); // VQADD.S32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vqadd_s64(int64x1_t a, int64x1_t b); // VQADD.S64 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vqadd_u8(uint8x8_t a, uint8x8_t b); // VQADD.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vqadd_u16(uint16x4_t a, uint16x4_t b); // VQADD.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vqadd_u32(uint32x2_t a, uint32x2_t b); // VQADD.U32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vqadd_u64(uint64x1_t a, uint64x1_t b); // VQADD.U64 d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vqaddq_s8(int8x16_t a, int8x16_t b); // VQADD.S8 q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vqaddq_s16(int16x8_t a, int16x8_t b); // VQADD.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vqaddq_s32(int32x4_t a, int32x4_t b); // VQADD.S32 q0,q0,q0 +_NEON2SSESTORAGE int64x2_t vqaddq_s64(int64x2_t a, int64x2_t b); // VQADD.S64 q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vqaddq_u8(uint8x16_t a, uint8x16_t b); // VQADD.U8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vqaddq_u16(uint16x8_t a, uint16x8_t b); // VQADD.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vqaddq_u32(uint32x4_t a, uint32x4_t b); // VQADD.U32 q0,q0,q0 +_NEON2SSESTORAGE uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b); // VQADD.U64 q0,q0,q0 +//Vector add high half: vaddhn-> Vr[i]:=Va[i]+Vb[i] +_NEON2SSESTORAGE int8x8_t vaddhn_s16(int16x8_t a, int16x8_t b); // VADDHN.I16 d0,q0,q0 +_NEON2SSESTORAGE int16x4_t vaddhn_s32(int32x4_t a, int32x4_t b); // VADDHN.I32 d0,q0,q0 +_NEON2SSESTORAGE int32x2_t vaddhn_s64(int64x2_t a, int64x2_t b); // VADDHN.I64 d0,q0,q0 +_NEON2SSESTORAGE uint8x8_t vaddhn_u16(uint16x8_t a, uint16x8_t b); // VADDHN.I16 d0,q0,q0 +_NEON2SSESTORAGE uint16x4_t vaddhn_u32(uint32x4_t a, uint32x4_t b); // VADDHN.I32 d0,q0,q0 +_NEON2SSE_GLOBAL uint32x2_t vaddhn_u64(uint64x2_t a, uint64x2_t b); // VADDHN.I64 d0,q0,q0 +//Vector rounding add high half: vraddhn +_NEON2SSESTORAGE int8x8_t vraddhn_s16(int16x8_t a, int16x8_t b); // VRADDHN.I16 d0,q0,q0 +_NEON2SSESTORAGE int16x4_t vraddhn_s32(int32x4_t a, int32x4_t b); // VRADDHN.I32 d0,q0,q0 +_NEON2SSESTORAGE int32x2_t vraddhn_s64(int64x2_t a, int64x2_t b); // VRADDHN.I64 d0,q0,q0 +_NEON2SSESTORAGE uint8x8_t vraddhn_u16(uint16x8_t a, uint16x8_t b); // VRADDHN.I16 d0,q0,q0 +_NEON2SSESTORAGE uint16x4_t vraddhn_u32(uint32x4_t a, uint32x4_t b); // VRADDHN.I32 d0,q0,q0 +_NEON2SSE_GLOBAL uint32x2_t vraddhn_u64(uint64x2_t a, uint64x2_t b); // VRADDHN.I64 d0,q0,q0 +//Multiplication +//Vector multiply: vmul -> Vr[i] := Va[i] * Vb[i] +_NEON2SSESTORAGE int8x8_t vmul_s8(int8x8_t a, int8x8_t b); // VMUL.I8 d0,d0,d0 +_NEON2SSE_GLOBAL int16x4_t vmul_s16(int16x4_t a, int16x4_t b); // VMUL.I16 d0,d0,d0 +_NEON2SSE_GLOBAL int32x2_t vmul_s32(int32x2_t a, int32x2_t b); // VMUL.I32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vmul_f32(float32x2_t a, float32x2_t b); // VMUL.F32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vmul_u8(uint8x8_t a, uint8x8_t b); // VMUL.I8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vmul_u16(uint16x4_t a, uint16x4_t b); // VMUL.I16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vmul_u32(uint32x2_t a, uint32x2_t b); // VMUL.I32 d0,d0,d0 +_NEON2SSESTORAGE poly8x8_t vmul_p8(poly8x8_t a, poly8x8_t b); // VMUL.P8 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vmulq_s8(int8x16_t a, int8x16_t b); // VMUL.I8 q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vmulq_s16(int16x8_t a, int16x8_t b); // VMUL.I16 q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vmulq_s32(int32x4_t a, int32x4_t b); // VMUL.I32 q0,q0,q0 +_NEON2SSE_GLOBAL float32x4_t vmulq_f32(float32x4_t a, float32x4_t b); // VMUL.F32 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b); // VMUL.I8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vmulq_u16(uint16x8_t a, uint16x8_t b); // VMUL.I16 q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vmulq_u32(uint32x4_t a, uint32x4_t b); // VMUL.I32 q0,q0,q0 +_NEON2SSESTORAGE poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b); // VMUL.P8 q0,q0,q0 +//multiply lane +_NEON2SSESTORAGE int16x4_t vmul_lane_s16 (int16x4_t a, int16x4_t b, __constrange(0,3) int c); +_NEON2SSESTORAGE int32x2_t vmul_lane_s32 (int32x2_t a, int32x2_t b, __constrange(0,1) int c); +_NEON2SSESTORAGE float32x2_t vmul_lane_f32 (float32x2_t a, float32x2_t b, __constrange(0,1) int c); +_NEON2SSE_GLOBAL uint16x4_t vmul_lane_u16 (uint16x4_t a, uint16x4_t b, __constrange(0,3) int c); +_NEON2SSE_GLOBAL uint32x2_t vmul_lane_u32 (uint32x2_t a, uint32x2_t b, __constrange(0,1) int c); +_NEON2SSESTORAGE int16x8_t vmulq_lane_s16 (int16x8_t a, int16x4_t b, __constrange(0,3) int c); +_NEON2SSESTORAGE int32x4_t vmulq_lane_s32 (int32x4_t a, int32x2_t b, __constrange(0,1) int c); +_NEON2SSESTORAGE float32x4_t vmulq_lane_f32 (float32x4_t a, float32x2_t b, __constrange(0,1) int c); +_NEON2SSE_GLOBAL uint16x8_t vmulq_lane_u16 (uint16x8_t a, uint16x4_t b, __constrange(0,3) int c); +_NEON2SSE_GLOBAL uint32x4_t vmulq_lane_u32 (uint32x4_t a, uint32x2_t b, __constrange(0,1) int c); +//Vector multiply accumulate: vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i] +_NEON2SSESTORAGE int8x8_t vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VMLA.I8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c); // VMLA.I16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c); // VMLA.I32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c); // VMLA.F32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VMLA.I8 d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VMLA.I16 d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VMLA.I32 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLA.I8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLA.I16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLA.I32 q0,q0,q0 +_NEON2SSESTORAGE float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLA.F32 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLA.I8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLA.I16 q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLA.I32 q0,q0,q0 +//Vector multiply accumulate long: vmlal -> Vr[i] := Va[i] + Vb[i] * Vc[i] +_NEON2SSESTORAGE int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VMLAL.S8 q0,d0,d0 +_NEON2SSESTORAGE int32x4_t vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VMLAL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VMLAL.S32 q0,d0,d0 +_NEON2SSESTORAGE uint16x8_t vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c); // VMLAL.U8 q0,d0,d0 +_NEON2SSESTORAGE uint32x4_t vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VMLAL.U16 q0,d0,d0 +_NEON2SSESTORAGE uint64x2_t vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VMLAL.U32 q0,d0,d0 +//Vector multiply subtract: vmls -> Vr[i] := Va[i] - Vb[i] * Vc[i] +_NEON2SSESTORAGE int8x8_t vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VMLS.I8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c); // VMLS.I16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c); // VMLS.I32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c); // VMLS.F32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VMLS.I8 d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VMLS.I16 d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VMLS.I32 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLS.I8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLS.I16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLS.I32 q0,q0,q0 +_NEON2SSESTORAGE float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLS.F32 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLS.I8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLS.I16 q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLS.I32 q0,q0,q0 +//Vector multiply subtract long +_NEON2SSESTORAGE int16x8_t vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VMLSL.S8 q0,d0,d0 +_NEON2SSESTORAGE int32x4_t vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VMLSL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VMLSL.S32 q0,d0,d0 +_NEON2SSESTORAGE uint16x8_t vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c); // VMLSL.U8 q0,d0,d0 +_NEON2SSESTORAGE uint32x4_t vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VMLSL.U16 q0,d0,d0 +_NEON2SSESTORAGE uint64x2_t vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VMLSL.U32 q0,d0,d0 +//Vector saturating doubling multiply high +_NEON2SSESTORAGE int16x4_t vqdmulh_s16(int16x4_t a, int16x4_t b); // VQDMULH.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vqdmulh_s32(int32x2_t a, int32x2_t b); // VQDMULH.S32 d0,d0,d0 +_NEON2SSESTORAGE int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b); // VQDMULH.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b); // VQDMULH.S32 q0,q0,q0 +//Vector saturating rounding doubling multiply high +_NEON2SSESTORAGE int16x4_t vqrdmulh_s16(int16x4_t a, int16x4_t b); // VQRDMULH.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vqrdmulh_s32(int32x2_t a, int32x2_t b); // VQRDMULH.S32 d0,d0,d0 +_NEON2SSESTORAGE int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b); // VQRDMULH.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b); // VQRDMULH.S32 q0,q0,q0 +//Vector saturating doubling multiply accumulate long +_NEON2SSESTORAGE int32x4_t vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VQDMLAL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VQDMLAL.S32 q0,d0,d0 +//Vector saturating doubling multiply subtract long +_NEON2SSESTORAGE int32x4_t vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VQDMLSL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VQDMLSL.S32 q0,d0,d0 +//Vector long multiply +_NEON2SSESTORAGE int16x8_t vmull_s8(int8x8_t a, int8x8_t b); // VMULL.S8 q0,d0,d0 +_NEON2SSESTORAGE int32x4_t vmull_s16(int16x4_t a, int16x4_t b); // VMULL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vmull_s32(int32x2_t a, int32x2_t b); // VMULL.S32 q0,d0,d0 +_NEON2SSESTORAGE uint16x8_t vmull_u8(uint8x8_t a, uint8x8_t b); // VMULL.U8 q0,d0,d0 +_NEON2SSESTORAGE uint32x4_t vmull_u16(uint16x4_t a, uint16x4_t b); // VMULL.U16 q0,d0,d0 +_NEON2SSESTORAGE uint64x2_t vmull_u32(uint32x2_t a, uint32x2_t b); // VMULL.U32 q0,d0,d0 +_NEON2SSESTORAGE poly16x8_t vmull_p8(poly8x8_t a, poly8x8_t b); // VMULL.P8 q0,d0,d0 +//Vector saturating doubling long multiply +_NEON2SSESTORAGE int32x4_t vqdmull_s16(int16x4_t a, int16x4_t b); // VQDMULL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vqdmull_s32(int32x2_t a, int32x2_t b); // VQDMULL.S32 q0,d0,d0 +//Subtraction +//Vector subtract +_NEON2SSESTORAGE int8x8_t vsub_s8(int8x8_t a, int8x8_t b); // VSUB.I8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vsub_s16(int16x4_t a, int16x4_t b); // VSUB.I16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vsub_s32(int32x2_t a, int32x2_t b); // VSUB.I32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vsub_s64(int64x1_t a, int64x1_t b); // VSUB.I64 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vsub_f32(float32x2_t a, float32x2_t b); // VSUB.F32 d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vsub_u8(uint8x8_t a, uint8x8_t b); // VSUB.I8 d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vsub_u16(uint16x4_t a, uint16x4_t b); // VSUB.I16 d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vsub_u32(uint32x2_t a, uint32x2_t b); // VSUB.I32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vsub_u64(uint64x1_t a, uint64x1_t b); // VSUB.I64 d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vsubq_s8(int8x16_t a, int8x16_t b); // VSUB.I8 q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vsubq_s16(int16x8_t a, int16x8_t b); // VSUB.I16 q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vsubq_s32(int32x4_t a, int32x4_t b); // VSUB.I32 q0,q0,q0 +_NEON2SSE_GLOBAL int64x2_t vsubq_s64(int64x2_t a, int64x2_t b); // VSUB.I64 q0,q0,q0 +_NEON2SSE_GLOBAL float32x4_t vsubq_f32(float32x4_t a, float32x4_t b); // VSUB.F32 q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vsubq_u8(uint8x16_t a, uint8x16_t b); // VSUB.I8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vsubq_u16(uint16x8_t a, uint16x8_t b); // VSUB.I16 q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vsubq_u32(uint32x4_t a, uint32x4_t b); // VSUB.I32 q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vsubq_u64(uint64x2_t a, uint64x2_t b); // VSUB.I64 q0,q0,q0 +//Vector long subtract: vsub -> Vr[i]:=Va[i]+Vb[i] +_NEON2SSESTORAGE int16x8_t vsubl_s8(int8x8_t a, int8x8_t b); // VSUBL.S8 q0,d0,d0 +_NEON2SSESTORAGE int32x4_t vsubl_s16(int16x4_t a, int16x4_t b); // VSUBL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vsubl_s32(int32x2_t a, int32x2_t b); // VSUBL.S32 q0,d0,d0 +_NEON2SSESTORAGE uint16x8_t vsubl_u8(uint8x8_t a, uint8x8_t b); // VSUBL.U8 q0,d0,d0 +_NEON2SSESTORAGE uint32x4_t vsubl_u16(uint16x4_t a, uint16x4_t b); // VSUBL.U16 q0,d0,d0 +_NEON2SSESTORAGE uint64x2_t vsubl_u32(uint32x2_t a, uint32x2_t b); // VSUBL.U32 q0,d0,d0 +//Vector wide subtract: vsub -> Vr[i]:=Va[i]+Vb[i] +_NEON2SSESTORAGE int16x8_t vsubw_s8(int16x8_t a, int8x8_t b); // VSUBW.S8 q0,q0,d0 +_NEON2SSESTORAGE int32x4_t vsubw_s16(int32x4_t a, int16x4_t b); // VSUBW.S16 q0,q0,d0 +_NEON2SSESTORAGE int64x2_t vsubw_s32(int64x2_t a, int32x2_t b); // VSUBW.S32 q0,q0,d0 +_NEON2SSESTORAGE uint16x8_t vsubw_u8(uint16x8_t a, uint8x8_t b); // VSUBW.U8 q0,q0,d0 +_NEON2SSESTORAGE uint32x4_t vsubw_u16(uint32x4_t a, uint16x4_t b); // VSUBW.U16 q0,q0,d0 +_NEON2SSESTORAGE uint64x2_t vsubw_u32(uint64x2_t a, uint32x2_t b); // VSUBW.U32 q0,q0,d0 +//Vector saturating subtract +_NEON2SSESTORAGE int8x8_t vqsub_s8(int8x8_t a, int8x8_t b); // VQSUB.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vqsub_s16(int16x4_t a, int16x4_t b); // VQSUB.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vqsub_s32(int32x2_t a, int32x2_t b); // VQSUB.S32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vqsub_s64(int64x1_t a, int64x1_t b); // VQSUB.S64 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vqsub_u8(uint8x8_t a, uint8x8_t b); // VQSUB.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vqsub_u16(uint16x4_t a, uint16x4_t b); // VQSUB.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vqsub_u32(uint32x2_t a, uint32x2_t b); // VQSUB.U32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vqsub_u64(uint64x1_t a, uint64x1_t b); // VQSUB.U64 d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vqsubq_s8(int8x16_t a, int8x16_t b); // VQSUB.S8 q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vqsubq_s16(int16x8_t a, int16x8_t b); // VQSUB.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vqsubq_s32(int32x4_t a, int32x4_t b); // VQSUB.S32 q0,q0,q0 +_NEON2SSESTORAGE int64x2_t vqsubq_s64(int64x2_t a, int64x2_t b); // VQSUB.S64 q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vqsubq_u8(uint8x16_t a, uint8x16_t b); // VQSUB.U8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vqsubq_u16(uint16x8_t a, uint16x8_t b); // VQSUB.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vqsubq_u32(uint32x4_t a, uint32x4_t b); // VQSUB.U32 q0,q0,q0 +_NEON2SSESTORAGE uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b); // VQSUB.U64 q0,q0,q0 +//Vector halving subtract +_NEON2SSESTORAGE int8x8_t vhsub_s8(int8x8_t a, int8x8_t b); // VHSUB.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vhsub_s16(int16x4_t a, int16x4_t b); // VHSUB.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vhsub_s32(int32x2_t a, int32x2_t b); // VHSUB.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vhsub_u8(uint8x8_t a, uint8x8_t b); // VHSUB.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vhsub_u16(uint16x4_t a, uint16x4_t b); // VHSUB.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vhsub_u32(uint32x2_t a, uint32x2_t b); // VHSUB.U32 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b); // VHSUB.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b); // VHSUB.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vhsubq_s32(int32x4_t a, int32x4_t b); // VHSUB.S32 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vhsubq_u8(uint8x16_t a, uint8x16_t b); // VHSUB.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vhsubq_u16(uint16x8_t a, uint16x8_t b); // VHSUB.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vhsubq_u32(uint32x4_t a, uint32x4_t b); // VHSUB.U32 q0,q0,q0 +//Vector subtract high half +_NEON2SSESTORAGE int8x8_t vsubhn_s16(int16x8_t a, int16x8_t b); // VSUBHN.I16 d0,q0,q0 +_NEON2SSESTORAGE int16x4_t vsubhn_s32(int32x4_t a, int32x4_t b); // VSUBHN.I32 d0,q0,q0 +_NEON2SSESTORAGE int32x2_t vsubhn_s64(int64x2_t a, int64x2_t b); // VSUBHN.I64 d0,q0,q0 +_NEON2SSESTORAGE uint8x8_t vsubhn_u16(uint16x8_t a, uint16x8_t b); // VSUBHN.I16 d0,q0,q0 +_NEON2SSESTORAGE uint16x4_t vsubhn_u32(uint32x4_t a, uint32x4_t b); // VSUBHN.I32 d0,q0,q0 +_NEON2SSE_GLOBAL uint32x2_t vsubhn_u64(uint64x2_t a, uint64x2_t b); // VSUBHN.I64 d0,q0,q0 +//Vector rounding subtract high half +_NEON2SSESTORAGE int8x8_t vrsubhn_s16(int16x8_t a, int16x8_t b); // VRSUBHN.I16 d0,q0,q0 +_NEON2SSESTORAGE int16x4_t vrsubhn_s32(int32x4_t a, int32x4_t b); // VRSUBHN.I32 d0,q0,q0 +_NEON2SSESTORAGE int32x2_t vrsubhn_s64(int64x2_t a, int64x2_t b); // VRSUBHN.I64 d0,q0,q0 +_NEON2SSESTORAGE uint8x8_t vrsubhn_u16(uint16x8_t a, uint16x8_t b); // VRSUBHN.I16 d0,q0,q0 +_NEON2SSESTORAGE uint16x4_t vrsubhn_u32(uint32x4_t a, uint32x4_t b); // VRSUBHN.I32 d0,q0,q0 +_NEON2SSE_GLOBAL uint32x2_t vrsubhn_u64(uint64x2_t a, uint64x2_t b); // VRSUBHN.I64 d0,q0,q0 +//Comparison +//Vector compare equal +_NEON2SSESTORAGE uint8x8_t vceq_s8(int8x8_t a, int8x8_t b); // VCEQ.I8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vceq_s16(int16x4_t a, int16x4_t b); // VCEQ.I16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vceq_s32(int32x2_t a, int32x2_t b); // VCEQ.I32 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vceq_f32(float32x2_t a, float32x2_t b); // VCEQ.F32 d0, d0, d0 +_NEON2SSESTORAGE uint8x8_t vceq_u8(uint8x8_t a, uint8x8_t b); // VCEQ.I8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vceq_u16(uint16x4_t a, uint16x4_t b); // VCEQ.I16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vceq_u32(uint32x2_t a, uint32x2_t b); // VCEQ.I32 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x8_t vceq_p8(poly8x8_t a, poly8x8_t b); // VCEQ.I8 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x16_t vceqq_s8(int8x16_t a, int8x16_t b); // VCEQ.I8 q0, q0, q0 +_NEON2SSE_GLOBAL uint16x8_t vceqq_s16(int16x8_t a, int16x8_t b); // VCEQ.I16 q0, q0, q0 +_NEON2SSE_GLOBAL uint32x4_t vceqq_s32(int32x4_t a, int32x4_t b); // VCEQ.I32 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vceqq_f32(float32x4_t a, float32x4_t b); // VCEQ.F32 q0, q0, q0 +_NEON2SSE_GLOBAL uint8x16_t vceqq_u8(uint8x16_t a, uint8x16_t b); // VCEQ.I8 q0, q0, q0 +_NEON2SSE_GLOBAL uint16x8_t vceqq_u16(uint16x8_t a, uint16x8_t b); // VCEQ.I16 q0, q0, q0 +_NEON2SSE_GLOBAL uint32x4_t vceqq_u32(uint32x4_t a, uint32x4_t b); // VCEQ.I32 q0, q0, q0 +_NEON2SSE_GLOBAL uint8x16_t vceqq_p8(poly8x16_t a, poly8x16_t b); // VCEQ.I8 q0, q0, q0 +//Vector compare greater-than or equal +_NEON2SSESTORAGE uint8x8_t vcge_s8(int8x8_t a, int8x8_t b); // VCGE.S8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vcge_s16(int16x4_t a, int16x4_t b); // VCGE.S16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcge_s32(int32x2_t a, int32x2_t b); // VCGE.S32 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcge_f32(float32x2_t a, float32x2_t b); // VCGE.F32 d0, d0, d0 +_NEON2SSESTORAGE uint8x8_t vcge_u8(uint8x8_t a, uint8x8_t b); // VCGE.U8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vcge_u16(uint16x4_t a, uint16x4_t b); // VCGE.U16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcge_u32(uint32x2_t a, uint32x2_t b); // VCGE.U32 d0, d0, d0 +_NEON2SSESTORAGE uint8x16_t vcgeq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0 +_NEON2SSESTORAGE uint16x8_t vcgeq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcgeq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcgeq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0 +_NEON2SSESTORAGE uint8x16_t vcgeq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0 +_NEON2SSESTORAGE uint16x8_t vcgeq_u16(uint16x8_t a, uint16x8_t b); // VCGE.U16 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcgeq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0 +//Vector compare less-than or equal +_NEON2SSESTORAGE uint8x8_t vcle_s8(int8x8_t a, int8x8_t b); // VCGE.S8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vcle_s16(int16x4_t a, int16x4_t b); // VCGE.S16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcle_s32(int32x2_t a, int32x2_t b); // VCGE.S32 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcle_f32(float32x2_t a, float32x2_t b); // VCGE.F32 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x8_t vcle_u8(uint8x8_t a, uint8x8_t b); // VCGE.U8 d0, d0, d0 +_NEON2SSE_GLOBAL uint16x4_t vcle_u16(uint16x4_t a, uint16x4_t b); // VCGE.U16 d0, d0, d0 +_NEON2SSE_GLOBAL uint32x2_t vcle_u32(uint32x2_t a, uint32x2_t b); // VCGE.U32 d0, d0, d0 +_NEON2SSESTORAGE uint8x16_t vcleq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0 +_NEON2SSESTORAGE uint16x8_t vcleq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcleq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcleq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0 +_NEON2SSESTORAGE uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0 +_NEON2SSESTORAGE uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b); // VCGE.U16 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0 +//Vector compare greater-than +_NEON2SSESTORAGE uint8x8_t vcgt_s8(int8x8_t a, int8x8_t b); // VCGT.S8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vcgt_s16(int16x4_t a, int16x4_t b); // VCGT.S16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcgt_s32(int32x2_t a, int32x2_t b); // VCGT.S32 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcgt_f32(float32x2_t a, float32x2_t b); // VCGT.F32 d0, d0, d0 +_NEON2SSESTORAGE uint8x8_t vcgt_u8(uint8x8_t a, uint8x8_t b); // VCGT.U8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vcgt_u16(uint16x4_t a, uint16x4_t b); // VCGT.U16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vcgt_u32(uint32x2_t a, uint32x2_t b); // VCGT.U32 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x16_t vcgtq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0 +_NEON2SSE_GLOBAL uint16x8_t vcgtq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0 +_NEON2SSE_GLOBAL uint32x4_t vcgtq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0 +_NEON2SSESTORAGE uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0 +_NEON2SSESTORAGE uint16x8_t vcgtq_u16(uint16x8_t a, uint16x8_t b); // VCGT.U16 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vcgtq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0 +//Vector compare less-than +_NEON2SSE_GLOBAL uint8x8_t vclt_s8(int8x8_t a, int8x8_t b); // VCGT.S8 d0, d0, d0 +_NEON2SSE_GLOBAL uint16x4_t vclt_s16(int16x4_t a, int16x4_t b); // VCGT.S16 d0, d0, d0 +_NEON2SSE_GLOBAL uint32x2_t vclt_s32(int32x2_t a, int32x2_t b); // VCGT.S32 d0, d0, d0 +_NEON2SSE_GLOBAL uint32x2_t vclt_f32(float32x2_t a, float32x2_t b); // VCGT.F32 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x8_t vclt_u8(uint8x8_t a, uint8x8_t b); // VCGT.U8 d0, d0, d0 +_NEON2SSE_GLOBAL uint16x4_t vclt_u16(uint16x4_t a, uint16x4_t b); // VCGT.U16 d0, d0, d0 +_NEON2SSE_GLOBAL uint32x2_t vclt_u32(uint32x2_t a, uint32x2_t b); // VCGT.U32 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x16_t vcltq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0 +_NEON2SSE_GLOBAL uint16x8_t vcltq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0 +_NEON2SSE_GLOBAL uint32x4_t vcltq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0 +_NEON2SSE_GLOBAL uint32x4_t vcltq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0 +_NEON2SSE_GLOBAL uint8x16_t vcltq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0 +_NEON2SSE_GLOBAL uint16x8_t vcltq_u16(uint16x8_t a, uint16x8_t b); // VCGT.U16 q0, q0, q0 +_NEON2SSE_GLOBAL uint32x4_t vcltq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0 +//Vector compare absolute greater-than or equal +_NEON2SSESTORAGE uint32x2_t vcage_f32(float32x2_t a, float32x2_t b); // VACGE.F32 d0, d0, d0 +_NEON2SSESTORAGE uint32x4_t vcageq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0 +//Vector compare absolute less-than or equal +_NEON2SSESTORAGE uint32x2_t vcale_f32(float32x2_t a, float32x2_t b); // VACGE.F32 d0, d0, d0 +_NEON2SSESTORAGE uint32x4_t vcaleq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0 +//Vector compare absolute greater-than +_NEON2SSESTORAGE uint32x2_t vcagt_f32(float32x2_t a, float32x2_t b); // VACGT.F32 d0, d0, d0 +_NEON2SSESTORAGE uint32x4_t vcagtq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0 +//Vector compare absolute less-than +_NEON2SSESTORAGE uint32x2_t vcalt_f32(float32x2_t a, float32x2_t b); // VACGT.F32 d0, d0, d0 +_NEON2SSESTORAGE uint32x4_t vcaltq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0 +//Vector test bits +_NEON2SSESTORAGE uint8x8_t vtst_s8(int8x8_t a, int8x8_t b); // VTST.8 d0, d0, d0 +_NEON2SSESTORAGE uint16x4_t vtst_s16(int16x4_t a, int16x4_t b); // VTST.16 d0, d0, d0 +_NEON2SSESTORAGE uint32x2_t vtst_s32(int32x2_t a, int32x2_t b); // VTST.32 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x8_t vtst_u8(uint8x8_t a, uint8x8_t b); // VTST.8 d0, d0, d0 +_NEON2SSE_GLOBAL uint16x4_t vtst_u16(uint16x4_t a, uint16x4_t b); // VTST.16 d0, d0, d0 +_NEON2SSE_GLOBAL uint32x2_t vtst_u32(uint32x2_t a, uint32x2_t b); // VTST.32 d0, d0, d0 +_NEON2SSE_GLOBAL uint8x8_t vtst_p8(poly8x8_t a, poly8x8_t b); // VTST.8 d0, d0, d0 +_NEON2SSESTORAGE uint8x16_t vtstq_s8(int8x16_t a, int8x16_t b); // VTST.8 q0, q0, q0 +_NEON2SSESTORAGE uint16x8_t vtstq_s16(int16x8_t a, int16x8_t b); // VTST.16 q0, q0, q0 +_NEON2SSESTORAGE uint32x4_t vtstq_s32(int32x4_t a, int32x4_t b); // VTST.32 q0, q0, q0 +_NEON2SSE_GLOBAL uint8x16_t vtstq_u8(uint8x16_t a, uint8x16_t b); // VTST.8 q0, q0, q0 +_NEON2SSE_GLOBAL uint16x8_t vtstq_u16(uint16x8_t a, uint16x8_t b); // VTST.16 q0, q0, q0 +_NEON2SSE_GLOBAL uint32x4_t vtstq_u32(uint32x4_t a, uint32x4_t b); // VTST.32 q0, q0, q0 +_NEON2SSE_GLOBAL uint8x16_t vtstq_p8(poly8x16_t a, poly8x16_t b); // VTST.8 q0, q0, q0 +//Absolute difference +//Absolute difference between the arguments: Vr[i] = | Va[i] - Vb[i] | +_NEON2SSESTORAGE int8x8_t vabd_s8(int8x8_t a, int8x8_t b); // VABD.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vabd_s16(int16x4_t a, int16x4_t b); // VABD.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vabd_s32(int32x2_t a, int32x2_t b); // VABD.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vabd_u8(uint8x8_t a, uint8x8_t b); // VABD.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vabd_u16(uint16x4_t a, uint16x4_t b); // VABD.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vabd_u32(uint32x2_t a, uint32x2_t b); // VABD.U32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vabd_f32(float32x2_t a, float32x2_t b); // VABD.F32 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vabdq_s8(int8x16_t a, int8x16_t b); // VABD.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vabdq_s16(int16x8_t a, int16x8_t b); // VABD.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vabdq_s32(int32x4_t a, int32x4_t b); // VABD.S32 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b); // VABD.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b); // VABD.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vabdq_u32(uint32x4_t a, uint32x4_t b); // VABD.U32 q0,q0,q0 +_NEON2SSESTORAGE float32x4_t vabdq_f32(float32x4_t a, float32x4_t b); // VABD.F32 q0,q0,q0 +//Absolute difference - long +_NEON2SSESTORAGE int16x8_t vabdl_s8(int8x8_t a, int8x8_t b); // VABDL.S8 q0,d0,d0 +_NEON2SSESTORAGE int32x4_t vabdl_s16(int16x4_t a, int16x4_t b); // VABDL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vabdl_s32(int32x2_t a, int32x2_t b); // VABDL.S32 q0,d0,d0 +_NEON2SSESTORAGE uint16x8_t vabdl_u8(uint8x8_t a, uint8x8_t b); // VABDL.U8 q0,d0,d0 +_NEON2SSESTORAGE uint32x4_t vabdl_u16(uint16x4_t a, uint16x4_t b); // VABDL.U16 q0,d0,d0 +_NEON2SSESTORAGE uint64x2_t vabdl_u32(uint32x2_t a, uint32x2_t b); // VABDL.U32 q0,d0,d0 +//Absolute difference and accumulate: Vr[i] = Va[i] + | Vb[i] - Vc[i] | +_NEON2SSESTORAGE int8x8_t vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VABA.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c); // VABA.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c); // VABA.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VABA.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VABA.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VABA.U32 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VABA.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VABA.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VABA.S32 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VABA.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VABA.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VABA.U32 q0,q0,q0 +//Absolute difference and accumulate - long +_NEON2SSESTORAGE int16x8_t vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VABAL.S8 q0,d0,d0 +_NEON2SSESTORAGE int32x4_t vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VABAL.S16 q0,d0,d0 +_NEON2SSESTORAGE int64x2_t vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VABAL.S32 q0,d0,d0 +_NEON2SSESTORAGE uint16x8_t vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c); // VABAL.U8 q0,d0,d0 +_NEON2SSESTORAGE uint32x4_t vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VABAL.U16 q0,d0,d0 +_NEON2SSESTORAGE uint64x2_t vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VABAL.U32 q0,d0,d0 +//Max/Min +//vmax -> Vr[i] := (Va[i] >= Vb[i]) ? Va[i] : Vb[i] +_NEON2SSESTORAGE int8x8_t vmax_s8(int8x8_t a, int8x8_t b); // VMAX.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vmax_s16(int16x4_t a, int16x4_t b); // VMAX.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vmax_s32(int32x2_t a, int32x2_t b); // VMAX.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vmax_u8(uint8x8_t a, uint8x8_t b); // VMAX.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vmax_u16(uint16x4_t a, uint16x4_t b); // VMAX.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vmax_u32(uint32x2_t a, uint32x2_t b); // VMAX.U32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vmax_f32(float32x2_t a, float32x2_t b); // VMAX.F32 d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vmaxq_s8(int8x16_t a, int8x16_t b); // VMAX.S8 q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vmaxq_s16(int16x8_t a, int16x8_t b); // VMAX.S16 q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vmaxq_s32(int32x4_t a, int32x4_t b); // VMAX.S32 q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vmaxq_u8(uint8x16_t a, uint8x16_t b); // VMAX.U8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vmaxq_u16(uint16x8_t a, uint16x8_t b); // VMAX.U16 q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vmaxq_u32(uint32x4_t a, uint32x4_t b); // VMAX.U32 q0,q0,q0 +_NEON2SSE_GLOBAL float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0 + +_NEON2SSE_GLOBAL float64x2_t vmaxq_f64(float64x2_t a, float64x2_t b); // VMAX.F64 q0,q0,q0 + +//vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i] +_NEON2SSESTORAGE int8x8_t vmin_s8(int8x8_t a, int8x8_t b); // VMIN.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vmin_s16(int16x4_t a, int16x4_t b); // VMIN.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vmin_s32(int32x2_t a, int32x2_t b); // VMIN.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vmin_u8(uint8x8_t a, uint8x8_t b); // VMIN.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vmin_u16(uint16x4_t a, uint16x4_t b); // VMIN.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vmin_u32(uint32x2_t a, uint32x2_t b); // VMIN.U32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vmin_f32(float32x2_t a, float32x2_t b); // VMIN.F32 d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vminq_s8(int8x16_t a, int8x16_t b); // VMIN.S8 q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vminq_s16(int16x8_t a, int16x8_t b); // VMIN.S16 q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vminq_s32(int32x4_t a, int32x4_t b); // VMIN.S32 q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vminq_u8(uint8x16_t a, uint8x16_t b); // VMIN.U8 q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vminq_u16(uint16x8_t a, uint16x8_t b); // VMIN.U16 q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vminq_u32(uint32x4_t a, uint32x4_t b); // VMIN.U32 q0,q0,q0 +_NEON2SSE_GLOBAL float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0 + +_NEON2SSE_GLOBAL float64x2_t vminq_f64(float64x2_t a, float64x2_t b); // VMIN.F64 q0,q0,q0 + +//Pairwise addition +//Pairwise add +_NEON2SSESTORAGE int8x8_t vpadd_s8(int8x8_t a, int8x8_t b); // VPADD.I8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vpadd_s16(int16x4_t a, int16x4_t b); // VPADD.I16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vpadd_s32(int32x2_t a, int32x2_t b); // VPADD.I32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vpadd_u8(uint8x8_t a, uint8x8_t b); // VPADD.I8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vpadd_u16(uint16x4_t a, uint16x4_t b); // VPADD.I16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vpadd_u32(uint32x2_t a, uint32x2_t b); // VPADD.I32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vpadd_f32(float32x2_t a, float32x2_t b); // VPADD.F32 d0,d0,d0 +//Long pairwise add +_NEON2SSESTORAGE int16x4_t vpaddl_s8(int8x8_t a); // VPADDL.S8 d0,d0 +_NEON2SSESTORAGE int32x2_t vpaddl_s16(int16x4_t a); // VPADDL.S16 d0,d0 +_NEON2SSESTORAGE int64x1_t vpaddl_s32(int32x2_t a); // VPADDL.S32 d0,d0 +_NEON2SSESTORAGE uint16x4_t vpaddl_u8(uint8x8_t a); // VPADDL.U8 d0,d0 +_NEON2SSESTORAGE uint32x2_t vpaddl_u16(uint16x4_t a); // VPADDL.U16 d0,d0 +_NEON2SSESTORAGE uint64x1_t vpaddl_u32(uint32x2_t a); // VPADDL.U32 d0,d0 +_NEON2SSESTORAGE int16x8_t vpaddlq_s8(int8x16_t a); // VPADDL.S8 q0,q0 +_NEON2SSESTORAGE int32x4_t vpaddlq_s16(int16x8_t a); // VPADDL.S16 q0,q0 +_NEON2SSESTORAGE int64x2_t vpaddlq_s32(int32x4_t a); // VPADDL.S32 q0,q0 +_NEON2SSESTORAGE uint16x8_t vpaddlq_u8(uint8x16_t a); // VPADDL.U8 q0,q0 +_NEON2SSESTORAGE uint32x4_t vpaddlq_u16(uint16x8_t a); // VPADDL.U16 q0,q0 +_NEON2SSESTORAGE uint64x2_t vpaddlq_u32(uint32x4_t a); // VPADDL.U32 q0,q0 +//Long pairwise add and accumulate +_NEON2SSESTORAGE int16x4_t vpadal_s8(int16x4_t a, int8x8_t b); // VPADAL.S8 d0,d0 +_NEON2SSESTORAGE int32x2_t vpadal_s16(int32x2_t a, int16x4_t b); // VPADAL.S16 d0,d0 +_NEON2SSESTORAGE int64x1_t vpadal_s32(int64x1_t a, int32x2_t b); // VPADAL.S32 d0,d0 +_NEON2SSESTORAGE uint16x4_t vpadal_u8(uint16x4_t a, uint8x8_t b); // VPADAL.U8 d0,d0 +_NEON2SSESTORAGE uint32x2_t vpadal_u16(uint32x2_t a, uint16x4_t b); // VPADAL.U16 d0,d0 +_NEON2SSESTORAGE uint64x1_t vpadal_u32(uint64x1_t a, uint32x2_t b); // VPADAL.U32 d0,d0 +_NEON2SSESTORAGE int16x8_t vpadalq_s8(int16x8_t a, int8x16_t b); // VPADAL.S8 q0,q0 +_NEON2SSESTORAGE int32x4_t vpadalq_s16(int32x4_t a, int16x8_t b); // VPADAL.S16 q0,q0 +_NEON2SSESTORAGE int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b); // VPADAL.S32 q0,q0 +_NEON2SSESTORAGE uint16x8_t vpadalq_u8(uint16x8_t a, uint8x16_t b); // VPADAL.U8 q0,q0 +_NEON2SSESTORAGE uint32x4_t vpadalq_u16(uint32x4_t a, uint16x8_t b); // VPADAL.U16 q0,q0 +_NEON2SSESTORAGE uint64x2_t vpadalq_u32(uint64x2_t a, uint32x4_t b); // VPADAL.U32 q0,q0 +//Folding maximum vpmax -> takes maximum of adjacent pairs +_NEON2SSESTORAGE int8x8_t vpmax_s8(int8x8_t a, int8x8_t b); // VPMAX.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vpmax_s16(int16x4_t a, int16x4_t b); // VPMAX.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vpmax_s32(int32x2_t a, int32x2_t b); // VPMAX.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vpmax_u8(uint8x8_t a, uint8x8_t b); // VPMAX.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vpmax_u16(uint16x4_t a, uint16x4_t b); // VPMAX.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vpmax_u32(uint32x2_t a, uint32x2_t b); // VPMAX.U32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vpmax_f32(float32x2_t a, float32x2_t b); // VPMAX.F32 d0,d0,d0 +//Folding minimum vpmin -> takes minimum of adjacent pairs +_NEON2SSESTORAGE int8x8_t vpmin_s8(int8x8_t a, int8x8_t b); // VPMIN.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vpmin_s16(int16x4_t a, int16x4_t b); // VPMIN.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vpmin_s32(int32x2_t a, int32x2_t b); // VPMIN.S32 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vpmin_u8(uint8x8_t a, uint8x8_t b); // VPMIN.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vpmin_u16(uint16x4_t a, uint16x4_t b); // VPMIN.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vpmin_u32(uint32x2_t a, uint32x2_t b); // VPMIN.U32 d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vpmin_f32(float32x2_t a, float32x2_t b); // VPMIN.F32 d0,d0,d0 +//Reciprocal/Sqrt +_NEON2SSESTORAGE float32x2_t vrecps_f32(float32x2_t a, float32x2_t b); // VRECPS.F32 d0, d0, d0 +_NEON2SSESTORAGE float32x4_t vrecpsq_f32(float32x4_t a, float32x4_t b); // VRECPS.F32 q0, q0, q0 +_NEON2SSESTORAGE float32x2_t vrsqrts_f32(float32x2_t a, float32x2_t b); // VRSQRTS.F32 d0, d0, d0 +_NEON2SSESTORAGE float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b); // VRSQRTS.F32 q0, q0, q0 +//Shifts by signed variable +//Vector shift left: Vr[i] := Va[i] << Vb[i] (negative values shift right) +_NEON2SSESTORAGE int8x8_t vshl_s8(int8x8_t a, int8x8_t b); // VSHL.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vshl_s16(int16x4_t a, int16x4_t b); // VSHL.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vshl_s32(int32x2_t a, int32x2_t b); // VSHL.S32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vshl_s64(int64x1_t a, int64x1_t b); // VSHL.S64 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vshl_u8(uint8x8_t a, int8x8_t b); // VSHL.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vshl_u16(uint16x4_t a, int16x4_t b); // VSHL.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vshl_u32(uint32x2_t a, int32x2_t b); // VSHL.U32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vshl_u64(uint64x1_t a, int64x1_t b); // VSHL.U64 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vshlq_s8(int8x16_t a, int8x16_t b); // VSHL.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vshlq_s16(int16x8_t a, int16x8_t b); // VSHL.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vshlq_s32(int32x4_t a, int32x4_t b); // VSHL.S32 q0,q0,q0 +_NEON2SSESTORAGE int64x2_t vshlq_s64(int64x2_t a, int64x2_t b); // VSHL.S64 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vshlq_u8(uint8x16_t a, int8x16_t b); // VSHL.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vshlq_u16(uint16x8_t a, int16x8_t b); // VSHL.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vshlq_u32(uint32x4_t a, int32x4_t b); // VSHL.U32 q0,q0,q0 +_NEON2SSESTORAGE uint64x2_t vshlq_u64(uint64x2_t a, int64x2_t b); // VSHL.U64 q0,q0,q0 +//Vector saturating shift left: (negative values shift right) +_NEON2SSESTORAGE int8x8_t vqshl_s8(int8x8_t a, int8x8_t b); // VQSHL.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vqshl_s16(int16x4_t a, int16x4_t b); // VQSHL.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vqshl_s32(int32x2_t a, int32x2_t b); // VQSHL.S32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vqshl_s64(int64x1_t a, int64x1_t b); // VQSHL.S64 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vqshl_u8(uint8x8_t a, int8x8_t b); // VQSHL.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vqshl_u16(uint16x4_t a, int16x4_t b); // VQSHL.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vqshl_u32(uint32x2_t a, int32x2_t b); // VQSHL.U32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vqshl_u64(uint64x1_t a, int64x1_t b); // VQSHL.U64 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vqshlq_s8(int8x16_t a, int8x16_t b); // VQSHL.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vqshlq_s16(int16x8_t a, int16x8_t b); // VQSHL.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vqshlq_s32(int32x4_t a, int32x4_t b); // VQSHL.S32 q0,q0,q0 +_NEON2SSESTORAGE int64x2_t vqshlq_s64(int64x2_t a, int64x2_t b); // VQSHL.S64 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vqshlq_u8(uint8x16_t a, int8x16_t b); // VQSHL.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vqshlq_u16(uint16x8_t a, int16x8_t b); // VQSHL.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vqshlq_u32(uint32x4_t a, int32x4_t b); // VQSHL.U32 q0,q0,q0 +_NEON2SSESTORAGE uint64x2_t vqshlq_u64(uint64x2_t a, int64x2_t b); // VQSHL.U64 q0,q0,q0 +//Vector rounding shift left: (negative values shift right) +_NEON2SSESTORAGE int8x8_t vrshl_s8(int8x8_t a, int8x8_t b); // VRSHL.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vrshl_s16(int16x4_t a, int16x4_t b); // VRSHL.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vrshl_s32(int32x2_t a, int32x2_t b); // VRSHL.S32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vrshl_s64(int64x1_t a, int64x1_t b); // VRSHL.S64 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vrshl_u8(uint8x8_t a, int8x8_t b); // VRSHL.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vrshl_u16(uint16x4_t a, int16x4_t b); // VRSHL.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vrshl_u32(uint32x2_t a, int32x2_t b); // VRSHL.U32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vrshl_u64(uint64x1_t a, int64x1_t b); // VRSHL.U64 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vrshlq_s8(int8x16_t a, int8x16_t b); // VRSHL.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vrshlq_s16(int16x8_t a, int16x8_t b); // VRSHL.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vrshlq_s32(int32x4_t a, int32x4_t b); // VRSHL.S32 q0,q0,q0 +_NEON2SSESTORAGE int64x2_t vrshlq_s64(int64x2_t a, int64x2_t b); // VRSHL.S64 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vrshlq_u8(uint8x16_t a, int8x16_t b); // VRSHL.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vrshlq_u16(uint16x8_t a, int16x8_t b); // VRSHL.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vrshlq_u32(uint32x4_t a, int32x4_t b); // VRSHL.U32 q0,q0,q0 +_NEON2SSESTORAGE uint64x2_t vrshlq_u64(uint64x2_t a, int64x2_t b); // VRSHL.U64 q0,q0,q0 +//Vector saturating rounding shift left: (negative values shift right) +_NEON2SSESTORAGE int8x8_t vqrshl_s8(int8x8_t a, int8x8_t b); // VQRSHL.S8 d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vqrshl_s16(int16x4_t a, int16x4_t b); // VQRSHL.S16 d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vqrshl_s32(int32x2_t a, int32x2_t b); // VQRSHL.S32 d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vqrshl_s64(int64x1_t a, int64x1_t b); // VQRSHL.S64 d0,d0,d0 +_NEON2SSESTORAGE uint8x8_t vqrshl_u8(uint8x8_t a, int8x8_t b); // VQRSHL.U8 d0,d0,d0 +_NEON2SSESTORAGE uint16x4_t vqrshl_u16(uint16x4_t a, int16x4_t b); // VQRSHL.U16 d0,d0,d0 +_NEON2SSESTORAGE uint32x2_t vqrshl_u32(uint32x2_t a, int32x2_t b); // VQRSHL.U32 d0,d0,d0 +_NEON2SSESTORAGE uint64x1_t vqrshl_u64(uint64x1_t a, int64x1_t b); // VQRSHL.U64 d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b); // VQRSHL.S8 q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vqrshlq_s16(int16x8_t a, int16x8_t b); // VQRSHL.S16 q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vqrshlq_s32(int32x4_t a, int32x4_t b); // VQRSHL.S32 q0,q0,q0 +_NEON2SSESTORAGE int64x2_t vqrshlq_s64(int64x2_t a, int64x2_t b); // VQRSHL.S64 q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vqrshlq_u8(uint8x16_t a, int8x16_t b); // VQRSHL.U8 q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vqrshlq_u16(uint16x8_t a, int16x8_t b); // VQRSHL.U16 q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b); // VQRSHL.U32 q0,q0,q0 +_NEON2SSESTORAGE uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b); // VQRSHL.U64 q0,q0,q0 +//Shifts by a constant +//Vector shift right by constant +_NEON2SSESTORAGE int8x8_t vshr_n_s8(int8x8_t a, __constrange(1,8) int b); // VSHR.S8 d0,d0,#8 +_NEON2SSESTORAGE int16x4_t vshr_n_s16(int16x4_t a, __constrange(1,16) int b); // VSHR.S16 d0,d0,#16 +_NEON2SSESTORAGE int32x2_t vshr_n_s32(int32x2_t a, __constrange(1,32) int b); // VSHR.S32 d0,d0,#32 +_NEON2SSESTORAGE int64x1_t vshr_n_s64(int64x1_t a, __constrange(1,64) int b); // VSHR.S64 d0,d0,#64 +_NEON2SSESTORAGE uint8x8_t vshr_n_u8(uint8x8_t a, __constrange(1,8) int b); // VSHR.U8 d0,d0,#8 +_NEON2SSESTORAGE uint16x4_t vshr_n_u16(uint16x4_t a, __constrange(1,16) int b); // VSHR.U16 d0,d0,#16 +_NEON2SSESTORAGE uint32x2_t vshr_n_u32(uint32x2_t a, __constrange(1,32) int b); // VSHR.U32 d0,d0,#32 +_NEON2SSESTORAGE uint64x1_t vshr_n_u64(uint64x1_t a, __constrange(1,64) int b); // VSHR.U64 d0,d0,#64 +_NEON2SSESTORAGE int8x16_t vshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VSHR.S8 q0,q0,#8 +_NEON2SSE_GLOBAL int16x8_t vshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VSHR.S16 q0,q0,#16 +_NEON2SSE_GLOBAL int32x4_t vshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VSHR.S32 q0,q0,#32 +_NEON2SSESTORAGE int64x2_t vshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VSHR.S64 q0,q0,#64 +_NEON2SSESTORAGE uint8x16_t vshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VSHR.U8 q0,q0,#8 +_NEON2SSE_GLOBAL uint16x8_t vshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VSHR.U16 q0,q0,#16 +_NEON2SSE_GLOBAL uint32x4_t vshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VSHR.U32 q0,q0,#32 +_NEON2SSE_GLOBAL uint64x2_t vshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VSHR.U64 q0,q0,#64 +//Vector shift left by constant +_NEON2SSESTORAGE int8x8_t vshl_n_s8(int8x8_t a, __constrange(0,7) int b); // VSHL.I8 d0,d0,#0 +_NEON2SSESTORAGE int16x4_t vshl_n_s16(int16x4_t a, __constrange(0,15) int b); // VSHL.I16 d0,d0,#0 +_NEON2SSESTORAGE int32x2_t vshl_n_s32(int32x2_t a, __constrange(0,31) int b); // VSHL.I32 d0,d0,#0 +_NEON2SSESTORAGE int64x1_t vshl_n_s64(int64x1_t a, __constrange(0,63) int b); // VSHL.I64 d0,d0,#0 +_NEON2SSESTORAGE uint8x8_t vshl_n_u8(uint8x8_t a, __constrange(0,7) int b); // VSHL.I8 d0,d0,#0 +_NEON2SSE_GLOBAL uint16x4_t vshl_n_u16(uint16x4_t a, __constrange(0,15) int b); // VSHL.I16 d0,d0,#0 +_NEON2SSE_GLOBAL uint32x2_t vshl_n_u32(uint32x2_t a, __constrange(0,31) int b); // VSHL.I32 d0,d0,#0 +_NEON2SSE_GLOBAL uint64x1_t vshl_n_u64(uint64x1_t a, __constrange(0,63) int b); // VSHL.I64 d0,d0,#0 +_NEON2SSE_GLOBAL int8x16_t vshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0 +_NEON2SSE_GLOBAL int16x8_t vshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0 +_NEON2SSE_GLOBAL int32x4_t vshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0 +_NEON2SSE_GLOBAL int64x2_t vshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0 +_NEON2SSESTORAGE uint8x16_t vshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0 +_NEON2SSE_GLOBAL uint16x8_t vshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0 +_NEON2SSE_GLOBAL uint32x4_t vshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0 +_NEON2SSE_GLOBAL uint64x2_t vshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0 +//Vector rounding shift right by constant +_NEON2SSESTORAGE int8x8_t vrshr_n_s8(int8x8_t a, __constrange(1,8) int b); // VRSHR.S8 d0,d0,#8 +_NEON2SSESTORAGE int16x4_t vrshr_n_s16(int16x4_t a, __constrange(1,16) int b); // VRSHR.S16 d0,d0,#16 +_NEON2SSESTORAGE int32x2_t vrshr_n_s32(int32x2_t a, __constrange(1,32) int b); // VRSHR.S32 d0,d0,#32 +_NEON2SSESTORAGE int64x1_t vrshr_n_s64(int64x1_t a, __constrange(1,64) int b); // VRSHR.S64 d0,d0,#64 +_NEON2SSESTORAGE uint8x8_t vrshr_n_u8(uint8x8_t a, __constrange(1,8) int b); // VRSHR.U8 d0,d0,#8 +_NEON2SSESTORAGE uint16x4_t vrshr_n_u16(uint16x4_t a, __constrange(1,16) int b); // VRSHR.U16 d0,d0,#16 +_NEON2SSESTORAGE uint32x2_t vrshr_n_u32(uint32x2_t a, __constrange(1,32) int b); // VRSHR.U32 d0,d0,#32 +_NEON2SSESTORAGE uint64x1_t vrshr_n_u64(uint64x1_t a, __constrange(1,64) int b); // VRSHR.U64 d0,d0,#64 +_NEON2SSESTORAGE int8x16_t vrshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VRSHR.S8 q0,q0,#8 +_NEON2SSESTORAGE int16x8_t vrshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VRSHR.S16 q0,q0,#16 +_NEON2SSESTORAGE int32x4_t vrshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VRSHR.S32 q0,q0,#32 +_NEON2SSESTORAGE int64x2_t vrshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VRSHR.S64 q0,q0,#64 +_NEON2SSESTORAGE uint8x16_t vrshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VRSHR.U8 q0,q0,#8 +_NEON2SSESTORAGE uint16x8_t vrshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VRSHR.U16 q0,q0,#16 +_NEON2SSESTORAGE uint32x4_t vrshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VRSHR.U32 q0,q0,#32 +_NEON2SSESTORAGE uint64x2_t vrshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VRSHR.U64 q0,q0,#64 +//Vector shift right by constant and accumulate +_NEON2SSESTORAGE int8x8_t vsra_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c); // VSRA.S8 d0,d0,#8 +_NEON2SSESTORAGE int16x4_t vsra_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c); // VSRA.S16 d0,d0,#16 +_NEON2SSESTORAGE int32x2_t vsra_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c); // VSRA.S32 d0,d0,#32 +_NEON2SSESTORAGE int64x1_t vsra_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c); // VSRA.S64 d0,d0,#64 +_NEON2SSESTORAGE uint8x8_t vsra_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c); // VSRA.U8 d0,d0,#8 +_NEON2SSESTORAGE uint16x4_t vsra_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c); // VSRA.U16 d0,d0,#16 +_NEON2SSESTORAGE uint32x2_t vsra_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c); // VSRA.U32 d0,d0,#32 +_NEON2SSESTORAGE uint64x1_t vsra_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c); // VSRA.U64 d0,d0,#64 +_NEON2SSESTORAGE int8x16_t vsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRA.S8 q0,q0,#8 +_NEON2SSESTORAGE int16x8_t vsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRA.S16 q0,q0,#16 +_NEON2SSESTORAGE int32x4_t vsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRA.S32 q0,q0,#32 +_NEON2SSESTORAGE int64x2_t vsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRA.S64 q0,q0,#64 +_NEON2SSESTORAGE uint8x16_t vsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRA.U8 q0,q0,#8 +_NEON2SSESTORAGE uint16x8_t vsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRA.U16 q0,q0,#16 +_NEON2SSESTORAGE uint32x4_t vsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRA.U32 q0,q0,#32 +_NEON2SSESTORAGE uint64x2_t vsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRA.U64 q0,q0,#64 +//Vector rounding shift right by constant and accumulate +_NEON2SSESTORAGE int8x8_t vrsra_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c); // VRSRA.S8 d0,d0,#8 +_NEON2SSESTORAGE int16x4_t vrsra_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c); // VRSRA.S16 d0,d0,#16 +_NEON2SSESTORAGE int32x2_t vrsra_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c); // VRSRA.S32 d0,d0,#32 +_NEON2SSESTORAGE int64x1_t vrsra_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c); // VRSRA.S64 d0,d0,#64 +_NEON2SSESTORAGE uint8x8_t vrsra_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c); // VRSRA.U8 d0,d0,#8 +_NEON2SSESTORAGE uint16x4_t vrsra_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c); // VRSRA.U16 d0,d0,#16 +_NEON2SSESTORAGE uint32x2_t vrsra_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c); // VRSRA.U32 d0,d0,#32 +_NEON2SSESTORAGE uint64x1_t vrsra_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c); // VRSRA.U64 d0,d0,#64 +_NEON2SSESTORAGE int8x16_t vrsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VRSRA.S8 q0,q0,#8 +_NEON2SSESTORAGE int16x8_t vrsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VRSRA.S16 q0,q0,#16 +_NEON2SSESTORAGE int32x4_t vrsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VRSRA.S32 q0,q0,#32 +_NEON2SSESTORAGE int64x2_t vrsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VRSRA.S64 q0,q0,#64 +_NEON2SSESTORAGE uint8x16_t vrsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VRSRA.U8 q0,q0,#8 +_NEON2SSESTORAGE uint16x8_t vrsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VRSRA.U16 q0,q0,#16 +_NEON2SSESTORAGE uint32x4_t vrsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VRSRA.U32 q0,q0,#32 +_NEON2SSESTORAGE uint64x2_t vrsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VRSRA.U64 q0,q0,#64 +//Vector saturating shift left by constant +_NEON2SSESTORAGE int8x8_t vqshl_n_s8(int8x8_t a, __constrange(0,7) int b); // VQSHL.S8 d0,d0,#0 +_NEON2SSESTORAGE int16x4_t vqshl_n_s16(int16x4_t a, __constrange(0,15) int b); // VQSHL.S16 d0,d0,#0 +_NEON2SSESTORAGE int32x2_t vqshl_n_s32(int32x2_t a, __constrange(0,31) int b); // VQSHL.S32 d0,d0,#0 +_NEON2SSESTORAGE int64x1_t vqshl_n_s64(int64x1_t a, __constrange(0,63) int b); // VQSHL.S64 d0,d0,#0 +_NEON2SSESTORAGE uint8x8_t vqshl_n_u8(uint8x8_t a, __constrange(0,7) int b); // VQSHL.U8 d0,d0,#0 +_NEON2SSESTORAGE uint16x4_t vqshl_n_u16(uint16x4_t a, __constrange(0,15) int b); // VQSHL.U16 d0,d0,#0 +_NEON2SSESTORAGE uint32x2_t vqshl_n_u32(uint32x2_t a, __constrange(0,31) int b); // VQSHL.U32 d0,d0,#0 +_NEON2SSESTORAGE uint64x1_t vqshl_n_u64(uint64x1_t a, __constrange(0,63) int b); // VQSHL.U64 d0,d0,#0 +_NEON2SSESTORAGE int8x16_t vqshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHL.S8 q0,q0,#0 +_NEON2SSESTORAGE int16x8_t vqshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHL.S16 q0,q0,#0 +_NEON2SSESTORAGE int32x4_t vqshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHL.S32 q0,q0,#0 +_NEON2SSESTORAGE int64x2_t vqshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHL.S64 q0,q0,#0 +_NEON2SSESTORAGE uint8x16_t vqshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VQSHL.U8 q0,q0,#0 +_NEON2SSESTORAGE uint16x8_t vqshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VQSHL.U16 q0,q0,#0 +_NEON2SSESTORAGE uint32x4_t vqshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VQSHL.U32 q0,q0,#0 +_NEON2SSESTORAGE uint64x2_t vqshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VQSHL.U64 q0,q0,#0 +//Vector signed->unsigned saturating shift left by constant +_NEON2SSESTORAGE uint8x8_t vqshlu_n_s8(int8x8_t a, __constrange(0,7) int b); // VQSHLU.S8 d0,d0,#0 +_NEON2SSESTORAGE uint16x4_t vqshlu_n_s16(int16x4_t a, __constrange(0,15) int b); // VQSHLU.S16 d0,d0,#0 +_NEON2SSESTORAGE uint32x2_t vqshlu_n_s32(int32x2_t a, __constrange(0,31) int b); // VQSHLU.S32 d0,d0,#0 +_NEON2SSESTORAGE uint64x1_t vqshlu_n_s64(int64x1_t a, __constrange(0,63) int b); // VQSHLU.S64 d0,d0,#0 +_NEON2SSESTORAGE uint8x16_t vqshluq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHLU.S8 q0,q0,#0 +_NEON2SSESTORAGE uint16x8_t vqshluq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHLU.S16 q0,q0,#0 +_NEON2SSESTORAGE uint32x4_t vqshluq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHLU.S32 q0,q0,#0 +_NEON2SSESTORAGE uint64x2_t vqshluq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHLU.S64 q0,q0,#0 +//Vector narrowing shift right by constant +_NEON2SSESTORAGE int8x8_t vshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VSHRN.I16 d0,q0,#8 +_NEON2SSESTORAGE int16x4_t vshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VSHRN.I32 d0,q0,#16 +_NEON2SSESTORAGE int32x2_t vshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VSHRN.I64 d0,q0,#32 +_NEON2SSESTORAGE uint8x8_t vshrn_n_u16(uint16x8_t a, __constrange(1,8) int b); // VSHRN.I16 d0,q0,#8 +_NEON2SSESTORAGE uint16x4_t vshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VSHRN.I32 d0,q0,#16 +_NEON2SSESTORAGE uint32x2_t vshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VSHRN.I64 d0,q0,#32 +//Vector signed->unsigned narrowing saturating shift right by constant +_NEON2SSESTORAGE uint8x8_t vqshrun_n_s16(int16x8_t a, __constrange(1,8) int b); // VQSHRUN.S16 d0,q0,#8 +_NEON2SSESTORAGE uint16x4_t vqshrun_n_s32(int32x4_t a, __constrange(1,16) int b); // VQSHRUN.S32 d0,q0,#16 +_NEON2SSESTORAGE uint32x2_t vqshrun_n_s64(int64x2_t a, __constrange(1,32) int b); // VQSHRUN.S64 d0,q0,#32 +//Vector signed->unsigned rounding narrowing saturating shift right by constant +_NEON2SSESTORAGE uint8x8_t vqrshrun_n_s16(int16x8_t a, __constrange(1,8) int b); // VQRSHRUN.S16 d0,q0,#8 +_NEON2SSESTORAGE uint16x4_t vqrshrun_n_s32(int32x4_t a, __constrange(1,16) int b); // VQRSHRUN.S32 d0,q0,#16 +_NEON2SSESTORAGE uint32x2_t vqrshrun_n_s64(int64x2_t a, __constrange(1,32) int b); // VQRSHRUN.S64 d0,q0,#32 +//Vector narrowing saturating shift right by constant +_NEON2SSESTORAGE int8x8_t vqshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VQSHRN.S16 d0,q0,#8 +_NEON2SSESTORAGE int16x4_t vqshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VQSHRN.S32 d0,q0,#16 +_NEON2SSESTORAGE int32x2_t vqshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VQSHRN.S64 d0,q0,#32 +_NEON2SSESTORAGE uint8x8_t vqshrn_n_u16(uint16x8_t a, __constrange(1,8) int b); // VQSHRN.U16 d0,q0,#8 +_NEON2SSESTORAGE uint16x4_t vqshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VQSHRN.U32 d0,q0,#16 +_NEON2SSESTORAGE uint32x2_t vqshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VQSHRN.U64 d0,q0,#32 +//Vector rounding narrowing shift right by constant +_NEON2SSESTORAGE int8x8_t vrshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VRSHRN.I16 d0,q0,#8 +_NEON2SSESTORAGE int16x4_t vrshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VRSHRN.I32 d0,q0,#16 +_NEON2SSESTORAGE int32x2_t vrshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VRSHRN.I64 d0,q0,#32 +_NEON2SSESTORAGE uint8x8_t vrshrn_n_u16(uint16x8_t a, __constrange(1,8) int b); // VRSHRN.I16 d0,q0,#8 +_NEON2SSESTORAGE uint16x4_t vrshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VRSHRN.I32 d0,q0,#16 +_NEON2SSESTORAGE uint32x2_t vrshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VRSHRN.I64 d0,q0,#32 +//Vector rounding narrowing saturating shift right by constant +_NEON2SSESTORAGE int8x8_t vqrshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VQRSHRN.S16 d0,q0,#8 +_NEON2SSESTORAGE int16x4_t vqrshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VQRSHRN.S32 d0,q0,#16 +_NEON2SSESTORAGE int32x2_t vqrshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VQRSHRN.S64 d0,q0,#32 +_NEON2SSESTORAGE uint8x8_t vqrshrn_n_u16(uint16x8_t a, __constrange(1,8) int b); // VQRSHRN.U16 d0,q0,#8 +_NEON2SSESTORAGE uint16x4_t vqrshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VQRSHRN.U32 d0,q0,#16 +_NEON2SSESTORAGE uint32x2_t vqrshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VQRSHRN.U64 d0,q0,#32 +//Vector widening shift left by constant +_NEON2SSESTORAGE int16x8_t vshll_n_s8(int8x8_t a, __constrange(0,8) int b); // VSHLL.S8 q0,d0,#0 +_NEON2SSESTORAGE int32x4_t vshll_n_s16(int16x4_t a, __constrange(0,16) int b); // VSHLL.S16 q0,d0,#0 +_NEON2SSESTORAGE int64x2_t vshll_n_s32(int32x2_t a, __constrange(0,32) int b); // VSHLL.S32 q0,d0,#0 +_NEON2SSESTORAGE uint16x8_t vshll_n_u8(uint8x8_t a, __constrange(0,8) int b); // VSHLL.U8 q0,d0,#0 +_NEON2SSESTORAGE uint32x4_t vshll_n_u16(uint16x4_t a, __constrange(0,16) int b); // VSHLL.U16 q0,d0,#0 +_NEON2SSESTORAGE uint64x2_t vshll_n_u32(uint32x2_t a, __constrange(0,32) int b); // VSHLL.U32 q0,d0,#0 +//Shifts with insert +//Vector shift right and insert +_NEON2SSESTORAGE int8x8_t vsri_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c); // VSRI.8 d0,d0,#8 +_NEON2SSESTORAGE int16x4_t vsri_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c); // VSRI.16 d0,d0,#16 +_NEON2SSESTORAGE int32x2_t vsri_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c); // VSRI.32 d0,d0,#32 +_NEON2SSESTORAGE int64x1_t vsri_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c); // VSRI.64 d0,d0,#64 +_NEON2SSE_GLOBAL uint8x8_t vsri_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c); // VSRI.8 d0,d0,#8 +_NEON2SSE_GLOBAL uint16x4_t vsri_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c); // VSRI.16 d0,d0,#16 +_NEON2SSE_GLOBAL uint32x2_t vsri_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c); // VSRI.32 d0,d0,#32 +_NEON2SSE_GLOBAL uint64x1_t vsri_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c); // VSRI.64 d0,d0,#64 +_NEON2SSE_GLOBAL poly8x8_t vsri_n_p8(poly8x8_t a, poly8x8_t b, __constrange(1,8) int c); // VSRI.8 d0,d0,#8 +_NEON2SSE_GLOBAL poly16x4_t vsri_n_p16(poly16x4_t a, poly16x4_t b, __constrange(1,16) int c); // VSRI.16 d0,d0,#16 +_NEON2SSESTORAGE int8x16_t vsriq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8 +_NEON2SSESTORAGE int16x8_t vsriq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16 +_NEON2SSESTORAGE int32x4_t vsriq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32 +_NEON2SSESTORAGE int64x2_t vsriq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64 +_NEON2SSE_GLOBAL uint8x16_t vsriq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8 +_NEON2SSE_GLOBAL uint16x8_t vsriq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16 +_NEON2SSE_GLOBAL uint32x4_t vsriq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32 +_NEON2SSE_GLOBAL uint64x2_t vsriq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64 +_NEON2SSE_GLOBAL poly8x16_t vsriq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8 +_NEON2SSE_GLOBAL poly16x8_t vsriq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16 +//Vector shift left and insert +_NEON2SSESTORAGE int8x8_t vsli_n_s8(int8x8_t a, int8x8_t b, __constrange(0,7) int c); // VSLI.8 d0,d0,#0 +_NEON2SSESTORAGE int16x4_t vsli_n_s16(int16x4_t a, int16x4_t b, __constrange(0,15) int c); // VSLI.16 d0,d0,#0 +_NEON2SSESTORAGE int32x2_t vsli_n_s32(int32x2_t a, int32x2_t b, __constrange(0,31) int c); // VSLI.32 d0,d0,#0 +_NEON2SSESTORAGE int64x1_t vsli_n_s64(int64x1_t a, int64x1_t b, __constrange(0,63) int c); // VSLI.64 d0,d0,#0 +_NEON2SSE_GLOBAL uint8x8_t vsli_n_u8(uint8x8_t a, uint8x8_t b, __constrange(0,7) int c); // VSLI.8 d0,d0,#0 +_NEON2SSE_GLOBAL uint16x4_t vsli_n_u16(uint16x4_t a, uint16x4_t b, __constrange(0,15) int c); // VSLI.16 d0,d0,#0 +_NEON2SSE_GLOBAL uint32x2_t vsli_n_u32(uint32x2_t a, uint32x2_t b, __constrange(0,31) int c); // VSLI.32 d0,d0,#0 +_NEON2SSE_GLOBAL uint64x1_t vsli_n_u64(uint64x1_t a, uint64x1_t b, __constrange(0,63) int c); // VSLI.64 d0,d0,#0 +_NEON2SSE_GLOBAL poly8x8_t vsli_n_p8(poly8x8_t a, poly8x8_t b, __constrange(0,7) int c); // VSLI.8 d0,d0,#0 +_NEON2SSE_GLOBAL poly16x4_t vsli_n_p16(poly16x4_t a, poly16x4_t b, __constrange(0,15) int c); // VSLI.16 d0,d0,#0 +_NEON2SSESTORAGE int8x16_t vsliq_n_s8(int8x16_t a, int8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0 +_NEON2SSESTORAGE int16x8_t vsliq_n_s16(int16x8_t a, int16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0 +_NEON2SSESTORAGE int32x4_t vsliq_n_s32(int32x4_t a, int32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0 +_NEON2SSESTORAGE int64x2_t vsliq_n_s64(int64x2_t a, int64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0 +_NEON2SSE_GLOBAL uint8x16_t vsliq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0 +_NEON2SSE_GLOBAL uint16x8_t vsliq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0 +_NEON2SSE_GLOBAL uint32x4_t vsliq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0 +_NEON2SSE_GLOBAL uint64x2_t vsliq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0 +_NEON2SSE_GLOBAL poly8x16_t vsliq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0 +_NEON2SSE_GLOBAL poly16x8_t vsliq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0 +//Loads of a single vector or lane. Perform loads and stores of a single vector of some type. +//Load a single vector from memory +_NEON2SSE_GLOBAL uint8x16_t vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL uint16x8_t vld1q_u16(__transfersize(8) uint16_t const * ptr); // VLD1.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL uint32x4_t vld1q_u32(__transfersize(4) uint32_t const * ptr); // VLD1.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL uint64x2_t vld1q_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int8x16_t vld1q_s8(__transfersize(16) int8_t const * ptr); // VLD1.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int16x8_t vld1q_s16(__transfersize(8) int16_t const * ptr); // VLD1.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int32x4_t vld1q_s32(__transfersize(4) int32_t const * ptr); // VLD1.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int64x2_t vld1q_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +_NEON2SSE_GLOBAL float16x8_t vld1q_f16(__transfersize(8) __fp16 const * ptr); // VLD1.16 {d0, d1}, [r0] +_NEON2SSESTORAGE float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); // VLD1.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL poly8x16_t vld1q_p8(__transfersize(16) poly8_t const * ptr); // VLD1.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL poly16x8_t vld1q_p16(__transfersize(8) poly16_t const * ptr); // VLD1.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL uint8x8_t vld1_u8(__transfersize(8) uint8_t const * ptr); // VLD1.8 {d0}, [r0] +_NEON2SSE_GLOBAL uint16x4_t vld1_u16(__transfersize(4) uint16_t const * ptr); // VLD1.16 {d0}, [r0] +_NEON2SSE_GLOBAL uint32x2_t vld1_u32(__transfersize(2) uint32_t const * ptr); // VLD1.32 {d0}, [r0] +_NEON2SSE_GLOBAL uint64x1_t vld1_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL int8x8_t vld1_s8(__transfersize(8) int8_t const * ptr); // VLD1.8 {d0}, [r0] +_NEON2SSE_GLOBAL int16x4_t vld1_s16(__transfersize(4) int16_t const * ptr); // VLD1.16 {d0}, [r0] +_NEON2SSE_GLOBAL int32x2_t vld1_s32(__transfersize(2) int32_t const * ptr); // VLD1.32 {d0}, [r0] +_NEON2SSE_GLOBAL int64x1_t vld1_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL float16x4_t vld1_f16(__transfersize(4) __fp16 const * ptr); // VLD1.16 {d0}, [r0] +_NEON2SSESTORAGE float32x2_t vld1_f32(__transfersize(2) float32_t const * ptr); // VLD1.32 {d0}, [r0] +_NEON2SSE_GLOBAL poly8x8_t vld1_p8(__transfersize(8) poly8_t const * ptr); // VLD1.8 {d0}, [r0] +_NEON2SSE_GLOBAL poly16x4_t vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, [r0] + +_NEON2SSESTORAGE float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr); // VLD1.64 {d0, d1}, [r0] + +//Load a single lane from memory +_NEON2SSE_GLOBAL uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0] +_NEON2SSE_GLOBAL uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0] +_NEON2SSE_GLOBAL uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0] +_NEON2SSE_GLOBAL uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); // VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0] +_NEON2SSE_GLOBAL int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); //VLD1.16 {d0[0]}, [r0] +_NEON2SSE_GLOBAL int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); //VLD1.32 {d0[0]}, [r0] +_NEON2SSE_GLOBAL float16x8_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x8_t vec, __constrange(0,7) int lane); //VLD1.16 {d0[0]}, [r0] +_NEON2SSESTORAGE float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0] +_NEON2SSE_GLOBAL int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); //VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0] +_NEON2SSE_GLOBAL poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0] +_NEON2SSESTORAGE uint8x8_t vld1_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x8_t vec, __constrange(0,7) int lane); //VLD1.8 {d0[0]}, [r0] +_NEON2SSESTORAGE uint16x4_t vld1_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x4_t vec, __constrange(0,3) int lane); //VLD1.16 {d0[0]}, [r0] +_NEON2SSESTORAGE uint32x2_t vld1_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x2_t vec, __constrange(0,1) int lane); //VLD1.32 {d0[0]}, [r0] +_NEON2SSESTORAGE uint64x1_t vld1_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x1_t vec, __constrange(0,0) int lane); //VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL int8x8_t vld1_lane_s8(__transfersize(1) int8_t const * ptr, int8x8_t vec, __constrange(0,7) int lane); // VLD1.8{d0[0]}, [r0] +_NEON2SSE_GLOBAL int16x4_t vld1_lane_s16(__transfersize(1) int16_t const * ptr, int16x4_t vec, __constrange(0,3) int lane); //VLD1.16 {d0[0]}, [r0] +_NEON2SSE_GLOBAL int32x2_t vld1_lane_s32(__transfersize(1) int32_t const * ptr, int32x2_t vec, __constrange(0,1) int lane); //VLD1.32 {d0[0]}, [r0] +_NEON2SSE_GLOBAL float16x4_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x4_t vec, __constrange(0,3) int lane); //VLD1.16 {d0[0]}, [r0] +_NEON2SSESTORAGE float32x2_t vld1_lane_f32(__transfersize(1) float32_t const * ptr, float32x2_t vec, __constrange(0,1) int lane); // VLD1.32 {d0[0]}, [r0] +_NEON2SSE_GLOBAL int64x1_t vld1_lane_s64(__transfersize(1) int64_t const * ptr, int64x1_t vec, __constrange(0,0) int lane); //VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL poly8x8_t vld1_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x8_t vec, __constrange(0,7) int lane); //VLD1.8 {d0[0]}, [r0] +_NEON2SSE_GLOBAL poly16x4_t vld1_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x4_t vec, __constrange(0,3) int lane); //VLD1.16 {d0[0]}, [r0] +//Load all lanes of vector with same value from memory +_NEON2SSE_GLOBAL uint8x16_t vld1q_dup_u8(__transfersize(1) uint8_t const * ptr); // VLD1.8 {d0[]}, [r0] +_NEON2SSE_GLOBAL uint16x8_t vld1q_dup_u16(__transfersize(1) uint16_t const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSE_GLOBAL uint32x4_t vld1q_dup_u32(__transfersize(1) uint32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSESTORAGE uint64x2_t vld1q_dup_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL int8x16_t vld1q_dup_s8(__transfersize(1) int8_t const * ptr); // VLD1.8 {d0[]}, [r0] +_NEON2SSE_GLOBAL int16x8_t vld1q_dup_s16(__transfersize(1) int16_t const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSE_GLOBAL int32x4_t vld1q_dup_s32(__transfersize(1) int32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSE_GLOBAL int64x2_t vld1q_dup_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL float16x8_t vld1q_dup_f16(__transfersize(1) __fp16 const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSE_GLOBAL float32x4_t vld1q_dup_f32(__transfersize(1) float32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSE_GLOBAL poly8x16_t vld1q_dup_p8(__transfersize(1) poly8_t const * ptr); // VLD1.8 {d0[]}, [r0] +_NEON2SSE_GLOBAL poly16x8_t vld1q_dup_p16(__transfersize(1) poly16_t const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSESTORAGE uint8x8_t vld1_dup_u8(__transfersize(1) uint8_t const * ptr); // VLD1.8 {d0[]}, [r0] +_NEON2SSESTORAGE uint16x4_t vld1_dup_u16(__transfersize(1) uint16_t const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSESTORAGE uint32x2_t vld1_dup_u32(__transfersize(1) uint32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSESTORAGE uint64x1_t vld1_dup_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL int8x8_t vld1_dup_s8(__transfersize(1) int8_t const * ptr); // VLD1.8 {d0[]}, [r0] +_NEON2SSE_GLOBAL int16x4_t vld1_dup_s16(__transfersize(1) int16_t const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSE_GLOBAL int32x2_t vld1_dup_s32(__transfersize(1) int32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSE_GLOBAL int64x1_t vld1_dup_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_GLOBAL float16x4_t vld1_dup_f16(__transfersize(1) __fp16 const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSESTORAGE float32x2_t vld1_dup_f32(__transfersize(1) float32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSE_GLOBAL poly8x8_t vld1_dup_p8(__transfersize(1) poly8_t const * ptr); // VLD1.8 {d0[]}, [r0] +_NEON2SSE_GLOBAL poly16x4_t vld1_dup_p16(__transfersize(1) poly16_t const * ptr); // VLD1.16 {d0[]}, [r0] +//Store a single vector or lane. Stores all lanes or a single lane of a vector. +//Store a single vector into memory +_NEON2SSE_GLOBAL void vst1q_u8(__transfersize(16) uint8_t * ptr, uint8x16_t val); // VST1.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_u16(__transfersize(8) uint16_t * ptr, uint16x8_t val); // VST1.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_u32(__transfersize(4) uint32_t * ptr, uint32x4_t val); // VST1.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_u64(__transfersize(2) uint64_t * ptr, uint64x2_t val); // VST1.64 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_s8(__transfersize(16) int8_t * ptr, int8x16_t val); // VST1.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_s16(__transfersize(8) int16_t * ptr, int16x8_t val); // VST1.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_s32(__transfersize(4) int32_t * ptr, int32x4_t val); // VST1.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_s64(__transfersize(2) int64_t * ptr, int64x2_t val); // VST1.64 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_f16(__transfersize(8) __fp16 * ptr, float16x8_t val); // VST1.16 {d0, d1}, [r0] +_NEON2SSESTORAGE void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val); // VST1.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_p8(__transfersize(16) poly8_t * ptr, poly8x16_t val); // VST1.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst1q_p16(__transfersize(8) poly16_t * ptr, poly16x8_t val); // VST1.16 {d0, d1}, [r0] +_NEON2SSESTORAGE void vst1_u8(__transfersize(8) uint8_t * ptr, uint8x8_t val); // VST1.8 {d0}, [r0] +_NEON2SSESTORAGE void vst1_u16(__transfersize(4) uint16_t * ptr, uint16x4_t val); // VST1.16 {d0}, [r0] +_NEON2SSESTORAGE void vst1_u32(__transfersize(2) uint32_t * ptr, uint32x2_t val); // VST1.32 {d0}, [r0] +_NEON2SSESTORAGE void vst1_u64(__transfersize(1) uint64_t * ptr, uint64x1_t val); // VST1.64 {d0}, [r0] +_NEON2SSE_GLOBAL void vst1_s8(__transfersize(8) int8_t * ptr, int8x8_t val); // VST1.8 {d0}, [r0] +_NEON2SSE_GLOBAL void vst1_s16(__transfersize(4) int16_t * ptr, int16x4_t val); // VST1.16 {d0}, [r0] +_NEON2SSE_GLOBAL void vst1_s32(__transfersize(2) int32_t * ptr, int32x2_t val); // VST1.32 {d0}, [r0] +_NEON2SSE_GLOBAL void vst1_s64(__transfersize(1) int64_t * ptr, int64x1_t val); // VST1.64 {d0}, [r0] +_NEON2SSE_GLOBAL void vst1_f16(__transfersize(4) __fp16 * ptr, float16x4_t val); // VST1.16 {d0}, [r0] +_NEON2SSESTORAGE void vst1_f32(__transfersize(2) float32_t * ptr, float32x2_t val); // VST1.32 {d0}, [r0] +_NEON2SSE_GLOBAL void vst1_p8(__transfersize(8) poly8_t * ptr, poly8x8_t val); // VST1.8 {d0}, [r0] +_NEON2SSE_GLOBAL void vst1_p16(__transfersize(4) poly16_t * ptr, poly16x4_t val); // VST1.16 {d0}, [r0] +//Store a lane of a vector into memory +//Loads of an N-element structure +//Load N-element structure from memory +_NEON2SSESTORAGE uint8x16x2_t vld2q_u8(__transfersize(32) uint8_t const * ptr); // VLD2.8 {d0, d2}, [r0] +_NEON2SSESTORAGE uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr); // VLD2.16 {d0, d2}, [r0] +_NEON2SSESTORAGE uint32x4x2_t vld2q_u32(__transfersize(8) uint32_t const * ptr); // VLD2.32 {d0, d2}, [r0] +_NEON2SSE_GLOBAL int8x16x2_t vld2q_s8(__transfersize(32) int8_t const * ptr); // VLD2.8 {d0, d2}, [r0] +_NEON2SSE_GLOBAL int16x8x2_t vld2q_s16(__transfersize(16) int16_t const * ptr); // VLD2.16 {d0, d2}, [r0] +_NEON2SSE_GLOBAL int32x4x2_t vld2q_s32(__transfersize(8) int32_t const * ptr); // VLD2.32 {d0, d2}, [r0] +_NEON2SSE_GLOBAL float16x8x2_t vld2q_f16(__transfersize(16) __fp16 const * ptr); // VLD2.16 {d0, d2}, [r0] +_NEON2SSESTORAGE float32x4x2_t vld2q_f32(__transfersize(8) float32_t const * ptr); // VLD2.32 {d0, d2}, [r0] +_NEON2SSE_GLOBAL poly8x16x2_t vld2q_p8(__transfersize(32) poly8_t const * ptr); // VLD2.8 {d0, d2}, [r0] +_NEON2SSE_GLOBAL poly16x8x2_t vld2q_p16(__transfersize(16) poly16_t const * ptr); // VLD2.16 {d0, d2}, [r0] +_NEON2SSESTORAGE uint8x8x2_t vld2_u8(__transfersize(16) uint8_t const * ptr); // VLD2.8 {d0, d1}, [r0] +_NEON2SSESTORAGE uint16x4x2_t vld2_u16(__transfersize(8) uint16_t const * ptr); // VLD2.16 {d0, d1}, [r0] +_NEON2SSESTORAGE uint32x2x2_t vld2_u32(__transfersize(4) uint32_t const * ptr); // VLD2.32 {d0, d1}, [r0] +_NEON2SSESTORAGE uint64x1x2_t vld2_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int8x8x2_t vld2_s8(__transfersize(16) int8_t const * ptr); // VLD2.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int16x4x2_t vld2_s16(__transfersize(8) int16_t const * ptr); // VLD2.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int32x2x2_t vld2_s32(__transfersize(4) int32_t const * ptr); // VLD2.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int64x1x2_t vld2_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +//float16x4x2_t vld2_f16(__transfersize(8) __fp16 const * ptr); // VLD2.16 {d0, d1}, [r0] +_NEON2SSESTORAGE float32x2x2_t vld2_f32(__transfersize(4) float32_t const * ptr); // VLD2.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL poly8x8x2_t vld2_p8(__transfersize(16) poly8_t const * ptr); // VLD2.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL poly16x4x2_t vld2_p16(__transfersize(8) poly16_t const * ptr); // VLD2.16 {d0, d1}, [r0] +_NEON2SSESTORAGE uint8x16x3_t vld3q_u8(__transfersize(48) uint8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE uint16x8x3_t vld3q_u16(__transfersize(24) uint16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE uint32x4x3_t vld3q_u32(__transfersize(12) uint32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL int8x16x3_t vld3q_s8(__transfersize(48) int8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL int16x8x3_t vld3q_s16(__transfersize(24) int16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL int32x4x3_t vld3q_s32(__transfersize(12) int32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL float16x8x3_t vld3q_f16(__transfersize(24) __fp16 const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE float32x4x3_t vld3q_f32(__transfersize(12) float32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0] +poly8x16x3_t vld3q_p8(__transfersize(48) poly8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL poly16x8x3_t vld3q_p16(__transfersize(24) poly16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE uint8x8x3_t vld3_u8(__transfersize(24) uint8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE uint16x4x3_t vld3_u16(__transfersize(12) uint16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE uint32x2x3_t vld3_u32(__transfersize(6) uint32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE uint64x1x3_t vld3_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL int8x8x3_t vld3_s8(__transfersize(24) int8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL int16x4x3_t vld3_s16(__transfersize(12) int16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL int32x2x3_t vld3_s32(__transfersize(6) int32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL int64x1x3_t vld3_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL float16x4x3_t vld3_f16(__transfersize(12) __fp16 const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE float32x2x3_t vld3_f32(__transfersize(6) float32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL poly8x8x3_t vld3_p8(__transfersize(24) poly8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL poly16x4x3_t vld3_p16(__transfersize(12) poly16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE uint8x16x4_t vld4q_u8(__transfersize(64) uint8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE uint16x8x4_t vld4q_u16(__transfersize(32) uint16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE uint32x4x4_t vld4q_u32(__transfersize(16) uint32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL int8x16x4_t vld4q_s8(__transfersize(64) int8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL int16x8x4_t vld4q_s16(__transfersize(32) int16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL int32x4x4_t vld4q_s32(__transfersize(16) int32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL float16x8x4_t vld4q_f16(__transfersize(32) __fp16 const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE float32x4x4_t vld4q_f32(__transfersize(16) float32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL poly8x16x4_t vld4q_p8(__transfersize(64) poly8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL poly16x8x4_t vld4q_p16(__transfersize(32) poly16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE uint8x8x4_t vld4_u8(__transfersize(32) uint8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE uint16x4x4_t vld4_u16(__transfersize(16) uint16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE uint32x2x4_t vld4_u32(__transfersize(8) uint32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE uint64x1x4_t vld4_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL int8x8x4_t vld4_s8(__transfersize(32) int8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL int16x4x4_t vld4_s16(__transfersize(16) int16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL int32x2x4_t vld4_s32(__transfersize(8) int32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL int64x1x4_t vld4_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL float16x4x4_t vld4_f16(__transfersize(16) __fp16 const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE float32x2x4_t vld4_f32(__transfersize(8) float32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL poly8x8x4_t vld4_p8(__transfersize(32) poly8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL poly16x4x4_t vld4_p16(__transfersize(16) poly16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +//Load all lanes of N-element structure with same value from memory +_NEON2SSESTORAGE uint8x8x2_t vld2_dup_u8(__transfersize(2) uint8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0] +_NEON2SSESTORAGE uint16x4x2_t vld2_dup_u16(__transfersize(2) uint16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +_NEON2SSESTORAGE uint32x2x2_t vld2_dup_u32(__transfersize(2) uint32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0] +_NEON2SSE_GLOBAL uint64x1x2_t vld2_dup_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +_NEON2SSE_GLOBAL int8x8x2_t vld2_dup_s8(__transfersize(2) int8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0] +_NEON2SSE_GLOBAL int16x4x2_t vld2_dup_s16(__transfersize(2) int16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +_NEON2SSE_GLOBAL int32x2x2_t vld2_dup_s32(__transfersize(2) int32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0] +_NEON2SSE_GLOBAL int64x1x2_t vld2_dup_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +//float16x4x2_t vld2_dup_f16(__transfersize(2) __fp16 const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +_NEON2SSESTORAGE float32x2x2_t vld2_dup_f32(__transfersize(2) float32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0] +_NEON2SSE_GLOBAL poly8x8x2_t vld2_dup_p8(__transfersize(2) poly8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0] +_NEON2SSE_GLOBAL poly16x4x2_t vld2_dup_p16(__transfersize(2) poly16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +_NEON2SSESTORAGE uint8x8x3_t vld3_dup_u8(__transfersize(3) uint8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0] +_NEON2SSESTORAGE uint16x4x3_t vld3_dup_u16(__transfersize(3) uint16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +_NEON2SSESTORAGE uint32x2x3_t vld3_dup_u32(__transfersize(3) uint32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0] +_NEON2SSESTORAGE uint64x1x3_t vld3_dup_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL int8x8x3_t vld3_dup_s8(__transfersize(3) int8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_GLOBAL int16x4x3_t vld3_dup_s16(__transfersize(3) int16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_GLOBAL int32x2x3_t vld3_dup_s32(__transfersize(3) int32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_GLOBAL int64x1x3_t vld3_dup_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL float16x4x3_t vld3_dup_f16(__transfersize(3) __fp16 const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +_NEON2SSESTORAGE float32x2x3_t vld3_dup_f32(__transfersize(3) float32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_GLOBAL poly8x8x3_t vld3_dup_p8(__transfersize(3) poly8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_GLOBAL poly16x4x3_t vld3_dup_p16(__transfersize(3) poly16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +_NEON2SSESTORAGE uint8x8x4_t vld4_dup_u8(__transfersize(4) uint8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSESTORAGE uint16x4x4_t vld4_dup_u16(__transfersize(4) uint16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSESTORAGE uint32x2x4_t vld4_dup_u32(__transfersize(4) uint32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSESTORAGE uint64x1x4_t vld4_dup_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL int8x8x4_t vld4_dup_s8(__transfersize(4) int8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_GLOBAL int16x4x4_t vld4_dup_s16(__transfersize(4) int16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_GLOBAL int32x2x4_t vld4_dup_s32(__transfersize(4) int32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_GLOBAL int64x1x4_t vld4_dup_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL float16x4x4_t vld4_dup_f16(__transfersize(4) __fp16 const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSESTORAGE float32x2x4_t vld4_dup_f32(__transfersize(4) float32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_GLOBAL poly8x8x4_t vld4_dup_p8(__transfersize(4) poly8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_GLOBAL poly16x4x4_t vld4_dup_p16(__transfersize(4) poly16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +//Load a single lane of N-element structure from memory +//the functions below are modified to deal with the error C2719: 'src': formal parameter with __declspec(align('16')) won't be aligned +_NEON2SSESTORAGE uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE uint32x4x2_t vld2q_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x4x2_t * src, __constrange(0,3) int lane); // VLD2.32 {d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE int16x8x2_t vld2q_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE int32x4x2_t vld2q_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x4x2_t * src, __constrange(0,3) int lane); // VLD2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL float16x8x2_t vld2q_lane_f16_ptr(__transfersize(2) __fp16 const * ptr, float16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t * src, __constrange(0,3) int lane); // VLD2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL poly16x8x2_t vld2q_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE uint8x8x2_t vld2_lane_u8(__transfersize(2) uint8_t const * ptr, uint8x8x2_t src, __constrange(0,7) int lane); //VLD2.8 {d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE uint16x4x2_t vld2_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x4x2_t src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE uint32x2x2_t vld2_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x2x2_t src, __constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL int8x8x2_t vld2_lane_s8(__transfersize(2) int8_t const * ptr, int8x8x2_t src, __constrange(0,7) int lane); //VLD2.8 {d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL int16x4x2_t vld2_lane_s16(__transfersize(2) int16_t const * ptr, int16x4x2_t src, __constrange(0,3) int lane); //VLD2.16 {d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL int32x2x2_t vld2_lane_s32(__transfersize(2) int32_t const * ptr, int32x2x2_t src, __constrange(0,1) int lane); //VLD2.32 {d0[0], d1[0]}, [r0] +//float16x4x2_t vld2_lane_f16_ptr(__transfersize(2) __fp16 const * ptr, float16x4x2_t * src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE float32x2x2_t vld2_lane_f32(__transfersize(2) float32_t const * ptr, float32x2x2_t src, __constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL poly8x8x2_t vld2_lane_p8(__transfersize(2) poly8_t const * ptr, poly8x8x2_t src, __constrange(0,7) int lane); //VLD2.8 {d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL poly16x4x2_t vld2_lane_p16(__transfersize(2) poly16_t const * ptr, poly16x4x2_t src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE uint16x8x3_t vld3q_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE uint32x4x3_t vld3q_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x4x3_t * src, __constrange(0,3) int lane); // VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE int16x8x3_t vld3q_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE int32x4x3_t vld3q_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x4x3_t * src, __constrange(0,3) int lane); // VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE float32x4x3_t vld3q_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x4x3_t * src, __constrange(0,3) int lane); // VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE uint8x8x3_t vld3_lane_u8(__transfersize(3) uint8_t const * ptr, uint8x8x3_t src, __constrange(0,7) int lane); //VLD3.8 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE uint16x4x3_t vld3_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x4x3_t src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE uint32x2x3_t vld3_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x2x3_t src, __constrange(0,1) int lane); // VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL int8x8x3_t vld3_lane_s8(__transfersize(3) int8_t const * ptr, int8x8x3_t src, __constrange(0,7) int lane); //VLD3.8 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL int16x4x3_t vld3_lane_s16(__transfersize(3) int16_t const * ptr, int16x4x3_t src, __constrange(0,3) int lane); //VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL int32x2x3_t vld3_lane_s32(__transfersize(3) int32_t const * ptr, int32x2x3_t src, __constrange(0,1) int lane); //VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE float32x2x3_t vld3_lane_f32(__transfersize(3) float32_t const * ptr, float32x2x3_t src, __constrange(0,1) int lane); // VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL poly8x8x3_t vld3_lane_p8(__transfersize(3) poly8_t const * ptr, poly8x8x3_t src, __constrange(0,7) int lane); //VLD3.8 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL poly16x4x3_t vld3_lane_p16(__transfersize(3) poly16_t const * ptr, poly16x4x3_t src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE uint16x8x4_t vld4q_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSESTORAGE uint32x4x4_t vld4q_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSESTORAGE float32x4x4_t vld4q_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSESTORAGE uint8x8x4_t vld4_lane_u8(__transfersize(4) uint8_t const * ptr, uint8x8x4_t src, __constrange(0,7) int lane); //VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSESTORAGE uint16x4x4_t vld4_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x4x4_t src, __constrange(0,3) int lane); // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSESTORAGE uint32x2x4_t vld4_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x2x4_t src, __constrange(0,1) int lane); // VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL int8x8x4_t vld4_lane_s8(__transfersize(4) int8_t const * ptr, int8x8x4_t src, __constrange(0,7) int lane); //VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL int16x4x4_t vld4_lane_s16(__transfersize(4) int16_t const * ptr, int16x4x4_t src, __constrange(0,3) int lane); //VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL int32x2x4_t vld4_lane_s32(__transfersize(4) int32_t const * ptr, int32x2x4_t src, __constrange(0,1) int lane); //VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSESTORAGE float32x2x4_t vld4_lane_f32(__transfersize(4) float32_t const * ptr, float32x2x4_t src, __constrange(0,1) int lane); // VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL poly8x8x4_t vld4_lane_p8(__transfersize(4) poly8_t const * ptr, poly8x8x4_t src, __constrange(0,7) int lane); //VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL poly16x4x4_t vld4_lane_p16(__transfersize(4) poly16_t const * ptr, poly16x4x4_t src, __constrange(0,3) int lane); // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +//Store N-element structure to memory +_NEON2SSESTORAGE void vst2q_u8_ptr(__transfersize(32) uint8_t * ptr, uint8x16x2_t const * val); // VST2.8 {d0, d2}, [r0] +_NEON2SSESTORAGE void vst2q_u16_ptr(__transfersize(16) uint16_t * ptr, uint16x8x2_t const * val); // VST2.16 {d0, d2}, [r0] +_NEON2SSESTORAGE void vst2q_u32_ptr(__transfersize(8) uint32_t * ptr, uint32x4x2_t const * val); // VST2.32 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_s8_ptr(__transfersize(32) int8_t * ptr, int8x16x2_t const * val); // VST2.8 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_s16_ptr(__transfersize(16) int16_t * ptr, int16x8x2_t const * val); // VST2.16 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_s32_ptr(__transfersize(8) int32_t * ptr, int32x4x2_t const * val); // VST2.32 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_f16_ptr(__transfersize(16) __fp16 * ptr, float16x8x2_t const * val); // VST2.16 {d0, d2}, [r0] +_NEON2SSESTORAGE void vst2q_f32_ptr(__transfersize(8) float32_t * ptr, float32x4x2_t const * val); // VST2.32 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_p8_ptr(__transfersize(32) poly8_t * ptr, poly8x16x2_t const * val); // VST2.8 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_p16_ptr(__transfersize(16) poly16_t * ptr, poly16x8x2_t const * val); // VST2.16 {d0, d2}, [r0] +_NEON2SSESTORAGE void vst2_u8(__transfersize(16) uint8_t * ptr, uint8x8x2_t val); // VST2.8 {d0, d1}, [r0] +_NEON2SSESTORAGE void vst2_u16(__transfersize(8) uint16_t * ptr, uint16x4x2_t val); // VST2.16 {d0, d1}, [r0] +_NEON2SSESTORAGE void vst2_u32(__transfersize(4) uint32_t * ptr, uint32x2x2_t val); // VST2.32 {d0, d1}, [r0] +_NEON2SSESTORAGE void vst2_u64(__transfersize(2) uint64_t * ptr, uint64x1x2_t val); // VST1.64 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst2_s8(__transfersize(16) int8_t * ptr, int8x8x2_t val); // VST2.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst2_s16(__transfersize(8) int16_t * ptr, int16x4x2_t val); // VST2.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst2_s32(__transfersize(4) int32_t * ptr, int32x2x2_t val); // VST2.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst2_s64(__transfersize(2) int64_t * ptr, int64x1x2_t val); // VST1.64 {d0, d1}, [r0] +//void vst2_f16_ptr(__transfersize(8) __fp16 * ptr, float16x4x2_t const * val); // VST2.16 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst2_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x2_t const * val); // VST2.32 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst2_p8(__transfersize(16) poly8_t * ptr, poly8x8x2_t val); // VST2.8 {d0, d1}, [r0] +_NEON2SSE_GLOBAL void vst2_p16(__transfersize(8) poly16_t * ptr, poly16x4x2_t val); // VST2.16 {d0, d1}, [r0] +_NEON2SSESTORAGE void vst3q_u8_ptr(__transfersize(48) uint8_t * ptr, uint8x16x3_t const * val); // VST3.8 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE void vst3q_u16_ptr(__transfersize(24) uint16_t * ptr, uint16x8x3_t const * val); // VST3.16 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE void vst3q_u32_ptr(__transfersize(12) uint32_t * ptr, uint32x4x3_t const * val); // VST3.32 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_s8_ptr(__transfersize(48) int8_t * ptr, int8x16x3_t const * val); // VST3.8 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_s16_ptr(__transfersize(24) int16_t * ptr, int16x8x3_t const * val); // VST3.16 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_s32_ptr(__transfersize(12) int32_t * ptr, int32x4x3_t const * val); // VST3.32 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_f16_ptr(__transfersize(24) __fp16 * ptr, float16x8x3_t const * val); // VST3.16 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE void vst3q_f32_ptr(__transfersize(12) float32_t * ptr, float32x4x3_t const * val); // VST3.32 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_p8_ptr(__transfersize(48) poly8_t * ptr, poly8x16x3_t const * val); // VST3.8 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_p16_ptr(__transfersize(24) poly16_t * ptr, poly16x8x3_t const * val); // VST3.16 {d0, d2, d4}, [r0] +_NEON2SSESTORAGE void vst3_u8(__transfersize(24) uint8_t * ptr, uint8x8x3_t val); // VST3.8 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE void vst3_u16(__transfersize(12) uint16_t * ptr, uint16x4x3_t val); // VST3.16 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE void vst3_u32(__transfersize(6) uint32_t * ptr, uint32x2x3_t val); // VST3.32 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE void vst3_u64(__transfersize(3) uint64_t * ptr, uint64x1x3_t val); // VST1.64 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_s8(__transfersize(24) int8_t * ptr, int8x8x3_t val); // VST3.8 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_s16(__transfersize(12) int16_t * ptr, int16x4x3_t val); // VST3.16 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_s32(__transfersize(6) int32_t * ptr, int32x2x3_t val); // VST3.32 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_s64(__transfersize(3) int64_t * ptr, int64x1x3_t val); // VST1.64 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_f16_ptr(__transfersize(12) __fp16 * ptr, float16x4x3_t const * val); // VST3.16 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE void vst3_f32(__transfersize(6) float32_t * ptr, float32x2x3_t val); // VST3.32 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_p8(__transfersize(24) poly8_t * ptr, poly8x8x3_t val); // VST3.8 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_p16(__transfersize(12) poly16_t * ptr, poly16x4x3_t val); // VST3.16 {d0, d1, d2}, [r0] +_NEON2SSESTORAGE void vst4q_u8_ptr(__transfersize(64) uint8_t * ptr, uint8x16x4_t const * val); // VST4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE void vst4q_u16_ptr(__transfersize(32) uint16_t * ptr, uint16x8x4_t const * val); // VST4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE void vst4q_u32_ptr(__transfersize(16) uint32_t * ptr, uint32x4x4_t const * val); // VST4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_s8_ptr(__transfersize(64) int8_t * ptr, int8x16x4_t const * val); // VST4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_s16_ptr(__transfersize(32) int16_t * ptr, int16x8x4_t const * val); // VST4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_s32_ptr(__transfersize(16) int32_t * ptr, int32x4x4_t const * val); // VST4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_f16_ptr(__transfersize(32) __fp16 * ptr, float16x8x4_t const * val); // VST4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE void vst4q_f32_ptr(__transfersize(16) float32_t * ptr, float32x4x4_t const * val); // VST4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_p8_ptr(__transfersize(64) poly8_t * ptr, poly8x16x4_t const * val); // VST4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_p16_ptr(__transfersize(32) poly16_t * ptr, poly16x8x4_t const * val); // VST4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSESTORAGE void vst4_u8(__transfersize(32) uint8_t * ptr, uint8x8x4_t val); // VST4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE void vst4_u16(__transfersize(16) uint16_t * ptr, uint16x4x4_t val); // VST4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE void vst4_u32(__transfersize(8) uint32_t * ptr, uint32x2x4_t val); // VST4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE void vst4_u64(__transfersize(4) uint64_t * ptr, uint64x1x4_t val); // VST1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_s8(__transfersize(32) int8_t * ptr, int8x8x4_t val); // VST4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_s16(__transfersize(16) int16_t * ptr, int16x4x4_t val); // VST4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_s32(__transfersize(8) int32_t * ptr, int32x2x4_t val); // VST4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_s64(__transfersize(4) int64_t * ptr, int64x1x4_t val); // VST1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_f16_ptr(__transfersize(16) __fp16 * ptr, float16x4x4_t const * val); // VST4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSESTORAGE void vst4_f32(__transfersize(8) float32_t * ptr, float32x2x4_t val); // VST4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_p8(__transfersize(32) poly8_t * ptr, poly8x8x4_t val); // VST4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_p16(__transfersize(16) poly16_t * ptr, poly16x4x4_t val); // VST4.16 {d0, d1, d2, d3}, [r0] +//Store a single lane of N-element structure to memory +_NEON2SSESTORAGE void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t const * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE void vst2q_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x4x2_t const * val, __constrange(0,3) int lane); // VST2.32{d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t const * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t const * val, __constrange(0,3) int lane); // VST2.32{d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t const * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE void vst2q_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x4x2_t const * val, __constrange(0,3) int lane); //VST2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t const * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0] +_NEON2SSESTORAGE void vst2_lane_u8(__transfersize(2) uint8_t * ptr, uint8x8x2_t val, __constrange(0,7) int lane); // VST2.8{d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE void vst2_lane_u16(__transfersize(2) uint16_t * ptr, uint16x4x2_t val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE void vst2_lane_u32(__transfersize(2) uint32_t * ptr, uint32x2x2_t val, __constrange(0,1) int lane); // VST2.32{d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL void vst2_lane_s8(__transfersize(2) int8_t * ptr, int8x8x2_t val, __constrange(0,7) int lane); // VST2.8 {d0[0],d1[0]}, [r0] +_NEON2SSE_GLOBAL void vst2_lane_s16(__transfersize(2) int16_t * ptr, int16x4x2_t val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL void vst2_lane_s32(__transfersize(2) int32_t * ptr, int32x2x2_t val, __constrange(0,1) int lane); // VST2.32{d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL void vst2_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x4x2_t const * val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE void vst2_lane_f32(__transfersize(2) float32_t * ptr, float32x2x2_t val, __constrange(0,1) int lane); // VST2.32{d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL void vst2_lane_p8(__transfersize(2) poly8_t * ptr, poly8x8x2_t val, __constrange(0,7) int lane); // VST2.8{d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL void vst2_lane_p16(__transfersize(2) poly16_t * ptr, poly16x4x2_t val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0] +_NEON2SSESTORAGE void vst3q_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x8x3_t const * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE void vst3q_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x4x3_t const * val, __constrange(0,3) int lane); // VST3.32{d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t const * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t const * val, __constrange(0,3) int lane); // VST3.32{d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t const * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE void vst3q_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x4x3_t const * val, __constrange(0,3) int lane); //VST3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x8x3_t const * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0] +_NEON2SSESTORAGE void vst3_lane_u8(__transfersize(3) uint8_t * ptr, uint8x8x3_t val, __constrange(0,7) int lane); // VST3.8{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE void vst3_lane_u16(__transfersize(3) uint16_t * ptr, uint16x4x3_t val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE void vst3_lane_u32(__transfersize(3) uint32_t * ptr, uint32x2x3_t val, __constrange(0,1) int lane); // VST3.32{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst3_lane_s8(__transfersize(3) int8_t * ptr, int8x8x3_t val, __constrange(0,7) int lane); // VST3.8 {d0[0],d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst3_lane_s16(__transfersize(3) int16_t * ptr, int16x4x3_t val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst3_lane_s32(__transfersize(3) int32_t * ptr, int32x2x3_t val, __constrange(0,1) int lane); // VST3.32{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t const * val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE void vst3_lane_f32(__transfersize(3) float32_t * ptr, float32x2x3_t val, __constrange(0,1) int lane); // VST3.32{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst3_lane_p8(__transfersize(3) poly8_t * ptr, poly8x8x3_t val, __constrange(0,7) int lane); // VST3.8{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst3_lane_p16(__transfersize(3) poly16_t * ptr, poly16x4x3_t val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0] +_NEON2SSESTORAGE void vst4q_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x8x4_t const * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSESTORAGE void vst4q_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x4x4_t const * val, __constrange(0,3) int lane); // VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t const * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t const * val, __constrange(0,3) int lane); // VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t const * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSESTORAGE void vst4q_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x4x4_t const * val, __constrange(0,3) int lane); //VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t const * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSESTORAGE void vst4_lane_u8(__transfersize(4) uint8_t * ptr, uint8x8x4_t val, __constrange(0,7) int lane); // VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSESTORAGE void vst4_lane_u16(__transfersize(4) uint16_t * ptr, uint16x4x4_t val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSESTORAGE void vst4_lane_u32(__transfersize(4) uint32_t * ptr, uint32x2x4_t val, __constrange(0,1) int lane); // VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL void vst4_lane_s8(__transfersize(4) int8_t * ptr, int8x8x4_t val, __constrange(0,7) int lane); // VST4.8 {d0[0],d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL void vst4_lane_s16(__transfersize(4) int16_t * ptr, int16x4x4_t val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL void vst4_lane_s32(__transfersize(4) int32_t * ptr, int32x2x4_t val, __constrange(0,1) int lane); // VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t const * val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSESTORAGE void vst4_lane_f32(__transfersize(4) float32_t * ptr, float32x2x4_t val, __constrange(0,1) int lane); // VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL void vst4_lane_p8(__transfersize(4) poly8_t * ptr, poly8x8x4_t val, __constrange(0,7) int lane); // VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL void vst4_lane_p16(__transfersize(4) poly16_t * ptr, poly16x4x4_t val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] +//Extract lanes from a vector and put into a register. These intrinsics extract a single lane (element) from a vector. +_NEON2SSE_GLOBAL uint8_t vget_lane_u8(uint8x8_t vec, __constrange(0,7) int lane); // VMOV.U8 r0, d0[0] +_NEON2SSE_GLOBAL uint16_t vget_lane_u16(uint16x4_t vec, __constrange(0,3) int lane); // VMOV.U16 r0, d0[0] +_NEON2SSE_GLOBAL uint32_t vget_lane_u32(uint32x2_t vec, __constrange(0,1) int lane); // VMOV.32 r0, d0[0] +_NEON2SSE_GLOBAL int8_t vget_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VMOV.S8 r0, d0[0] +_NEON2SSE_GLOBAL int16_t vget_lane_s16(int16x4_t vec, __constrange(0,3) int lane); // VMOV.S16 r0, d0[0] +_NEON2SSE_GLOBAL int32_t vget_lane_s32(int32x2_t vec, __constrange(0,1) int lane); // VMOV.32 r0, d0[0] +_NEON2SSE_GLOBAL poly8_t vget_lane_p8(poly8x8_t vec, __constrange(0,7) int lane); // VMOV.U8 r0, d0[0] +_NEON2SSE_GLOBAL poly16_t vget_lane_p16(poly16x4_t vec, __constrange(0,3) int lane); // VMOV.U16 r0, d0[0] +_NEON2SSE_GLOBAL float32_t vget_lane_f32(float32x2_t vec, __constrange(0,1) int lane); // VMOV.32 r0, d0[0] +_NEON2SSE_GLOBAL uint8_t vgetq_lane_u8(uint8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0] +_NEON2SSE_GLOBAL uint16_t vgetq_lane_u16(uint16x8_t vec, __constrange(0,7) int lane); // VMOV.U16 r0, d0[0] +_NEON2SSE_GLOBAL uint32_t vgetq_lane_u32(uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0] +_NEON2SSE_GLOBAL int8_t vgetq_lane_s8(int8x16_t vec, __constrange(0,15) int lane); // VMOV.S8 r0, d0[0] +_NEON2SSE_GLOBAL int16_t vgetq_lane_s16(int16x8_t vec, __constrange(0,7) int lane); // VMOV.S16 r0, d0[0] +_NEON2SSE_GLOBAL int32_t vgetq_lane_s32(int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0] +_NEON2SSE_GLOBAL poly8_t vgetq_lane_p8(poly8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0] +_NEON2SSE_GLOBAL poly16_t vgetq_lane_p16(poly16x8_t vec, __constrange(0,7) int lane); // VMOV.U16 r0, d0[0] +_NEON2SSESTORAGE float32_t vgetq_lane_f32(float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0] +_NEON2SSE_GLOBAL int64_t vget_lane_s64(int64x1_t vec, __constrange(0,0) int lane); // VMOV r0,r0,d0 +_NEON2SSE_GLOBAL uint64_t vget_lane_u64(uint64x1_t vec, __constrange(0,0) int lane); // VMOV r0,r0,d0 +_NEON2SSE_GLOBAL int64_t vgetq_lane_s64(int64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0 +_NEON2SSE_GLOBAL uint64_t vgetq_lane_u64(uint64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0 +//Load a single lane of a vector from a literal. These intrinsics set a single lane (element) within a vector. +_NEON2SSESTORAGE uint8x8_t vset_lane_u8(uint8_t value, uint8x8_t vec, __constrange(0,7) int lane); // VMOV.8 d0[0],r0 +_NEON2SSESTORAGE uint16x4_t vset_lane_u16(uint16_t value, uint16x4_t vec, __constrange(0,3) int lane); // VMOV.16 d0[0],r0 +_NEON2SSESTORAGE uint32x2_t vset_lane_u32(uint32_t value, uint32x2_t vec, __constrange(0,1) int lane); // VMOV.32 d0[0],r0 +_NEON2SSESTORAGE int8x8_t vset_lane_s8(int8_t value, int8x8_t vec, __constrange(0,7) int lane); // VMOV.8 d0[0],r0 +_NEON2SSESTORAGE int16x4_t vset_lane_s16(int16_t value, int16x4_t vec, __constrange(0,3) int lane); // VMOV.16 d0[0],r0 +_NEON2SSESTORAGE int32x2_t vset_lane_s32(int32_t value, int32x2_t vec, __constrange(0,1) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_GLOBAL poly8x8_t vset_lane_p8(poly8_t value, poly8x8_t vec, __constrange(0,7) int lane); // VMOV.8 d0[0],r0 +_NEON2SSE_GLOBAL poly16x4_t vset_lane_p16(poly16_t value, poly16x4_t vec, __constrange(0,3) int lane); // VMOV.16 d0[0],r0 +_NEON2SSESTORAGE float32x2_t vset_lane_f32(float32_t value, float32x2_t vec, __constrange(0,1) int lane); // VMOV.32 d0[0],r0 +_NEON2SSESTORAGE uint8x16_t vsetq_lane_u8(uint8_t value, uint8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0 +_NEON2SSESTORAGE uint16x8_t vsetq_lane_u16(uint16_t value, uint16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0 +_NEON2SSESTORAGE uint32x4_t vsetq_lane_u32(uint32_t value, uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0 +_NEON2SSESTORAGE int8x16_t vsetq_lane_s8(int8_t value, int8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0 +_NEON2SSESTORAGE int16x8_t vsetq_lane_s16(int16_t value, int16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0 +_NEON2SSESTORAGE int32x4_t vsetq_lane_s32(int32_t value, int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_GLOBAL poly8x16_t vsetq_lane_p8(poly8_t value, poly8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0 +_NEON2SSE_GLOBAL poly16x8_t vsetq_lane_p16(poly16_t value, poly16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0 +_NEON2SSESTORAGE float32x4_t vsetq_lane_f32(float32_t value, float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0 +_NEON2SSESTORAGE int64x1_t vset_lane_s64(int64_t value, int64x1_t vec, __constrange(0,0) int lane); // VMOV d0,r0,r0 +_NEON2SSESTORAGE uint64x1_t vset_lane_u64(uint64_t value, uint64x1_t vec, __constrange(0,0) int lane); // VMOV d0,r0,r0 +_NEON2SSESTORAGE int64x2_t vsetq_lane_s64(int64_t value, int64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint64x2_t vsetq_lane_u64(uint64_t value, uint64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0 +//Initialize a vector from a literal bit pattern. +_NEON2SSESTORAGE int8x8_t vcreate_s8(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL int16x4_t vcreate_s16(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL int32x2_t vcreate_s32(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL float16x4_t vcreate_f16(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSESTORAGE float32x2_t vcreate_f32(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint8x8_t vcreate_u8(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint16x4_t vcreate_u16(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint32x2_t vcreate_u32(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint64x1_t vcreate_u64(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL poly8x8_t vcreate_p8(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL poly16x4_t vcreate_p16(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL int64x1_t vcreate_s64(uint64_t a); // VMOV d0,r0,r0 +//Set all lanes to same value +//Load all lanes of vector to the same literal value +_NEON2SSESTORAGE uint8x8_t vdup_n_u8(uint8_t value); // VDUP.8 d0,r0 +_NEON2SSESTORAGE uint16x4_t vdup_n_u16(uint16_t value); // VDUP.16 d0,r0 +_NEON2SSESTORAGE uint32x2_t vdup_n_u32(uint32_t value); // VDUP.32 d0,r0 +_NEON2SSESTORAGE int8x8_t vdup_n_s8(int8_t value); // VDUP.8 d0,r0 +_NEON2SSESTORAGE int16x4_t vdup_n_s16(int16_t value); // VDUP.16 d0,r0 +_NEON2SSESTORAGE int32x2_t vdup_n_s32(int32_t value); // VDUP.32 d0,r0 +_NEON2SSE_GLOBAL poly8x8_t vdup_n_p8(poly8_t value); // VDUP.8 d0,r0 +_NEON2SSE_GLOBAL poly16x4_t vdup_n_p16(poly16_t value); // VDUP.16 d0,r0 +_NEON2SSESTORAGE float32x2_t vdup_n_f32(float32_t value); // VDUP.32 d0,r0 +_NEON2SSE_GLOBAL uint8x16_t vdupq_n_u8(uint8_t value); // VDUP.8 q0,r0 +_NEON2SSE_GLOBAL uint16x8_t vdupq_n_u16(uint16_t value); // VDUP.16 q0,r0 +_NEON2SSE_GLOBAL uint32x4_t vdupq_n_u32(uint32_t value); // VDUP.32 q0,r0 +_NEON2SSE_GLOBAL int8x16_t vdupq_n_s8(int8_t value); // VDUP.8 q0,r0 +_NEON2SSE_GLOBAL int16x8_t vdupq_n_s16(int16_t value); // VDUP.16 q0,r0 +_NEON2SSE_GLOBAL int32x4_t vdupq_n_s32(int32_t value); // VDUP.32 q0,r0 +_NEON2SSE_GLOBAL poly8x16_t vdupq_n_p8(poly8_t value); // VDUP.8 q0,r0 +_NEON2SSE_GLOBAL poly16x8_t vdupq_n_p16(poly16_t value); // VDUP.16 q0,r0 +_NEON2SSE_GLOBAL float32x4_t vdupq_n_f32(float32_t value); // VDUP.32 q0,r0 +_NEON2SSESTORAGE int64x1_t vdup_n_s64(int64_t value); // VMOV d0,r0,r0 +_NEON2SSESTORAGE uint64x1_t vdup_n_u64(uint64_t value); // VMOV d0,r0,r0 +_NEON2SSESTORAGE int64x2_t vdupq_n_s64(int64_t value); // VMOV d0,r0,r0 +_NEON2SSESTORAGE uint64x2_t vdupq_n_u64(uint64_t value); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint8x8_t vmov_n_u8(uint8_t value); // VDUP.8 d0,r0 +_NEON2SSE_GLOBAL uint16x4_t vmov_n_u16(uint16_t value); // VDUP.16 d0,r0 +_NEON2SSE_GLOBAL uint32x2_t vmov_n_u32(uint32_t value); // VDUP.32 d0,r0 +_NEON2SSE_GLOBAL int8x8_t vmov_n_s8(int8_t value); // VDUP.8 d0,r0 +_NEON2SSE_GLOBAL int16x4_t vmov_n_s16(int16_t value); // VDUP.16 d0,r0 +_NEON2SSE_GLOBAL int32x2_t vmov_n_s32(int32_t value); // VDUP.32 d0,r0 +_NEON2SSE_GLOBAL poly8x8_t vmov_n_p8(poly8_t value); // VDUP.8 d0,r0 +_NEON2SSE_GLOBAL poly16x4_t vmov_n_p16(poly16_t value); // VDUP.16 d0,r0 +_NEON2SSE_GLOBAL float32x2_t vmov_n_f32(float32_t value); // VDUP.32 d0,r0 +_NEON2SSE_GLOBAL uint8x16_t vmovq_n_u8(uint8_t value); // VDUP.8 q0,r0 +_NEON2SSE_GLOBAL uint16x8_t vmovq_n_u16(uint16_t value); // VDUP.16 q0,r0 +_NEON2SSE_GLOBAL uint32x4_t vmovq_n_u32(uint32_t value); // VDUP.32 q0,r0 +_NEON2SSE_GLOBAL int8x16_t vmovq_n_s8(int8_t value); // VDUP.8 q0,r0 +_NEON2SSE_GLOBAL int16x8_t vmovq_n_s16(int16_t value); // VDUP.16 q0,r0 +_NEON2SSE_GLOBAL int32x4_t vmovq_n_s32(int32_t value); // VDUP.32 q0,r0 +_NEON2SSE_GLOBAL poly8x16_t vmovq_n_p8(poly8_t value); // VDUP.8 q0,r0 +_NEON2SSE_GLOBAL poly16x8_t vmovq_n_p16(poly16_t value); // VDUP.16 q0,r0 +_NEON2SSE_GLOBAL float32x4_t vmovq_n_f32(float32_t value); // VDUP.32 q0,r0 +_NEON2SSE_GLOBAL int64x1_t vmov_n_s64(int64_t value); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint64x1_t vmov_n_u64(uint64_t value); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL int64x2_t vmovq_n_s64(int64_t value); // VMOV d0,r0,r0 +_NEON2SSE_GLOBAL uint64x2_t vmovq_n_u64(uint64_t value); // VMOV d0,r0,r0 +//Load all lanes of the vector to the value of a lane of a vector +_NEON2SSESTORAGE uint8x8_t vdup_lane_u8(uint8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[0] +_NEON2SSESTORAGE uint16x4_t vdup_lane_u16(uint16x4_t vec, __constrange(0,3) int lane); // VDUP.16 d0,d0[0] +_NEON2SSESTORAGE uint32x2_t vdup_lane_u32(uint32x2_t vec, __constrange(0,1) int lane); // VDUP.32 d0,d0[0] +_NEON2SSE_GLOBAL int8x8_t vdup_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[0] +_NEON2SSE_GLOBAL int16x4_t vdup_lane_s16(int16x4_t vec, __constrange(0,3) int lane); // VDUP.16 d0,d0[0] +_NEON2SSE_GLOBAL int32x2_t vdup_lane_s32(int32x2_t vec, __constrange(0,1) int lane); // VDUP.32 d0,d0[0] +_NEON2SSE_GLOBAL poly8x8_t vdup_lane_p8(poly8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[0] +_NEON2SSE_GLOBAL poly16x4_t vdup_lane_p16(poly16x4_t vec, __constrange(0,3) int lane); // VDUP.16 d0,d0[0] +_NEON2SSESTORAGE float32x2_t vdup_lane_f32(float32x2_t vec, __constrange(0,1) int lane); // VDUP.32 d0,d0[0] +_NEON2SSESTORAGE uint8x16_t vdupq_lane_u8(uint8x8_t vec, __constrange(0,7) int lane); // VDUP.8 q0,d0[0] +_NEON2SSESTORAGE uint16x8_t vdupq_lane_u16(uint16x4_t vec, __constrange(0,3) int lane); // VDUP.16 q0,d0[0] +_NEON2SSESTORAGE uint32x4_t vdupq_lane_u32(uint32x2_t vec, __constrange(0,1) int lane); // VDUP.32 q0,d0[0] +_NEON2SSE_GLOBAL int8x16_t vdupq_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VDUP.8 q0,d0[0] +_NEON2SSE_GLOBAL int16x8_t vdupq_lane_s16(int16x4_t vec, __constrange(0,3) int lane); // VDUP.16 q0,d0[0] +_NEON2SSE_GLOBAL int32x4_t vdupq_lane_s32(int32x2_t vec, __constrange(0,1) int lane); // VDUP.32 q0,d0[0] +_NEON2SSE_GLOBAL poly8x16_t vdupq_lane_p8(poly8x8_t vec, __constrange(0,7) int lane); // VDUP.8 q0,d0[0] +_NEON2SSE_GLOBAL poly16x8_t vdupq_lane_p16(poly16x4_t vec, __constrange(0,3) int lane); // VDUP.16 q0,d0[0] +_NEON2SSE_GLOBAL float32x4_t vdupq_lane_f32(float32x2_t vec, __constrange(0,1) int lane); // VDUP.32 q0,d0[0] +_NEON2SSE_GLOBAL int64x1_t vdup_lane_s64(int64x1_t vec, __constrange(0,0) int lane); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vdup_lane_u64(uint64x1_t vec, __constrange(0,0) int lane); // VMOV d0,d0 +_NEON2SSESTORAGE int64x2_t vdupq_lane_s64(int64x1_t vec, __constrange(0,0) int lane); // VMOV q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vdupq_lane_u64(uint64x1_t vec, __constrange(0,0) int lane); // VMOV q0,q0 +//Combining vectors. These intrinsics join two 64 bit vectors into a single 128bit vector. +_NEON2SSESTORAGE int8x16_t vcombine_s8(int8x8_t low, int8x8_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL int16x8_t vcombine_s16(int16x4_t low, int16x4_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL int32x4_t vcombine_s32(int32x2_t low, int32x2_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL int64x2_t vcombine_s64(int64x1_t low, int64x1_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL float16x8_t vcombine_f16(float16x4_t low, float16x4_t high); // VMOV d0,d0 +_NEON2SSESTORAGE float32x4_t vcombine_f32(float32x2_t low, float32x2_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint8x16_t vcombine_u8(uint8x8_t low, uint8x8_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint16x8_t vcombine_u16(uint16x4_t low, uint16x4_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint32x4_t vcombine_u32(uint32x2_t low, uint32x2_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL poly8x16_t vcombine_p8(poly8x8_t low, poly8x8_t high); // VMOV d0,d0 +_NEON2SSE_GLOBAL poly16x8_t vcombine_p16(poly16x4_t low, poly16x4_t high); // VMOV d0,d0 +//Splitting vectors. These intrinsics split a 128 bit vector into 2 component 64 bit vectors +_NEON2SSESTORAGE int8x8_t vget_high_s8(int8x16_t a); // VMOV d0,d0 +_NEON2SSESTORAGE int16x4_t vget_high_s16(int16x8_t a); // VMOV d0,d0 +_NEON2SSESTORAGE int32x2_t vget_high_s32(int32x4_t a); // VMOV d0,d0 +_NEON2SSESTORAGE int64x1_t vget_high_s64(int64x2_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL float16x4_t vget_high_f16(float16x8_t a); // VMOV d0,d0 +_NEON2SSESTORAGE float32x2_t vget_high_f32(float32x4_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vget_high_u8(uint8x16_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vget_high_u16(uint16x8_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vget_high_u32(uint32x4_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vget_high_u64(uint64x2_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vget_high_p8(poly8x16_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL poly16x4_t vget_high_p16(poly16x8_t a); // VMOV d0,d0 +_NEON2SSESTORAGE int8x8_t vget_low_s8(int8x16_t a); // VMOV d0,d0 +_NEON2SSESTORAGE int16x4_t vget_low_s16(int16x8_t a); // VMOV d0,d0 +_NEON2SSESTORAGE int32x2_t vget_low_s32(int32x4_t a); // VMOV d0,d0 +_NEON2SSESTORAGE int64x1_t vget_low_s64(int64x2_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL float16x4_t vget_low_f16(float16x8_t a); // VMOV d0,d0 +_NEON2SSESTORAGE float32x2_t vget_low_f32(float32x4_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vget_low_u8(uint8x16_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vget_low_u16(uint16x8_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vget_low_u32(uint32x4_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vget_low_u64(uint64x2_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vget_low_p8(poly8x16_t a); // VMOV d0,d0 +_NEON2SSE_GLOBAL poly16x4_t vget_low_p16(poly16x8_t a); // VMOV d0,d0 +//Converting vectors. These intrinsics are used to convert vectors. +//Convert from float +_NEON2SSESTORAGE int32x2_t vcvt_s32_f32(float32x2_t a); // VCVT.S32.F32 d0, d0 +_NEON2SSESTORAGE uint32x2_t vcvt_u32_f32(float32x2_t a); // VCVT.U32.F32 d0, d0 +_NEON2SSESTORAGE int32x4_t vcvtq_s32_f32(float32x4_t a); // VCVT.S32.F32 q0, q0 +_NEON2SSESTORAGE uint32x4_t vcvtq_u32_f32(float32x4_t a); // VCVT.U32.F32 q0, q0 +_NEON2SSESTORAGE int32x2_t vcvt_n_s32_f32(float32x2_t a, __constrange(1,32) int b); // VCVT.S32.F32 d0, d0, #32 +_NEON2SSESTORAGE uint32x2_t vcvt_n_u32_f32(float32x2_t a, __constrange(1,32) int b); // VCVT.U32.F32 d0, d0, #32 +_NEON2SSESTORAGE int32x4_t vcvtq_n_s32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.S32.F32 q0, q0, #32 +_NEON2SSESTORAGE uint32x4_t vcvtq_n_u32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.U32.F32 q0, q0, #32 +_NEON2SSESTORAGE int32x4_t vcvtnq_s32_f32(float32x4_t a); // VCVTN.S32.F32 q0, q0 +//Convert to float +_NEON2SSESTORAGE float32x2_t vcvt_f32_s32(int32x2_t a); // VCVT.F32.S32 d0, d0 +_NEON2SSESTORAGE float32x2_t vcvt_f32_u32(uint32x2_t a); // VCVT.F32.U32 d0, d0 +_NEON2SSE_GLOBAL float32x4_t vcvtq_f32_s32(int32x4_t a); // VCVT.F32.S32 q0, q0 +_NEON2SSESTORAGE float32x4_t vcvtq_f32_u32(uint32x4_t a); // VCVT.F32.U32 q0, q0 +_NEON2SSESTORAGE float32x2_t vcvt_n_f32_s32(int32x2_t a, __constrange(1,32) int b); // VCVT.F32.S32 d0, d0, #32 +_NEON2SSESTORAGE float32x2_t vcvt_n_f32_u32(uint32x2_t a, __constrange(1,32) int b); // VCVT.F32.U32 d0, d0, #32 +_NEON2SSESTORAGE float32x4_t vcvtq_n_f32_s32(int32x4_t a, __constrange(1,32) int b); // VCVT.F32.S32 q0, q0, #32 +_NEON2SSESTORAGE float32x4_t vcvtq_n_f32_u32(uint32x4_t a, __constrange(1,32) int b); // VCVT.F32.U32 q0, q0, #32 +//Convert between floats +_NEON2SSE_GLOBAL float16x4_t vcvt_f16_f32(float32x4_t a); // VCVT.F16.F32 d0, q0 +_NEON2SSE_GLOBAL float32x4_t vcvt_f32_f16(float16x4_t a); // VCVT.F32.F16 q0, d0 +//Vector narrow integer +_NEON2SSESTORAGE int8x8_t vmovn_s16(int16x8_t a); // VMOVN.I16 d0,q0 +_NEON2SSESTORAGE int16x4_t vmovn_s32(int32x4_t a); // VMOVN.I32 d0,q0 +_NEON2SSESTORAGE int32x2_t vmovn_s64(int64x2_t a); // VMOVN.I64 d0,q0 +_NEON2SSE_GLOBAL uint8x8_t vmovn_u16(uint16x8_t a); // VMOVN.I16 d0,q0 +_NEON2SSE_GLOBAL uint16x4_t vmovn_u32(uint32x4_t a); // VMOVN.I32 d0,q0 +_NEON2SSE_GLOBAL uint32x2_t vmovn_u64(uint64x2_t a); // VMOVN.I64 d0,q0 +//Vector long move +_NEON2SSESTORAGE int16x8_t vmovl_s8(int8x8_t a); // VMOVL.S8 q0,d0 +_NEON2SSESTORAGE int32x4_t vmovl_s16(int16x4_t a); // VMOVL.S16 q0,d0 +_NEON2SSESTORAGE int64x2_t vmovl_s32(int32x2_t a); // VMOVL.S32 q0,d0 +_NEON2SSESTORAGE uint16x8_t vmovl_u8(uint8x8_t a); // VMOVL.U8 q0,d0 +_NEON2SSESTORAGE uint32x4_t vmovl_u16(uint16x4_t a); // VMOVL.U16 q0,d0 +_NEON2SSESTORAGE uint64x2_t vmovl_u32(uint32x2_t a); // VMOVL.U32 q0,d0 +//Vector saturating narrow integer +_NEON2SSESTORAGE int8x8_t vqmovn_s16(int16x8_t a); // VQMOVN.S16 d0,q0 +_NEON2SSESTORAGE int16x4_t vqmovn_s32(int32x4_t a); // VQMOVN.S32 d0,q0 +_NEON2SSESTORAGE int32x2_t vqmovn_s64(int64x2_t a); // VQMOVN.S64 d0,q0 +_NEON2SSESTORAGE uint8x8_t vqmovn_u16(uint16x8_t a); // VQMOVN.U16 d0,q0 +_NEON2SSESTORAGE uint16x4_t vqmovn_u32(uint32x4_t a); // VQMOVN.U32 d0,q0 +_NEON2SSESTORAGE uint32x2_t vqmovn_u64(uint64x2_t a); // VQMOVN.U64 d0,q0 +//Vector saturating narrow integer signed->unsigned +_NEON2SSESTORAGE uint8x8_t vqmovun_s16(int16x8_t a); // VQMOVUN.S16 d0,q0 +_NEON2SSESTORAGE uint16x4_t vqmovun_s32(int32x4_t a); // VQMOVUN.S32 d0,q0 +_NEON2SSESTORAGE uint32x2_t vqmovun_s64(int64x2_t a); // VQMOVUN.S64 d0,q0 +//Table look up +_NEON2SSESTORAGE uint8x8_t vtbl1_u8(uint8x8_t a, uint8x8_t b); // VTBL.8 d0, {d0}, d0 +_NEON2SSE_GLOBAL int8x8_t vtbl1_s8(int8x8_t a, int8x8_t b); // VTBL.8 d0, {d0}, d0 +_NEON2SSE_GLOBAL poly8x8_t vtbl1_p8(poly8x8_t a, uint8x8_t b); // VTBL.8 d0, {d0}, d0 +//Extended table look up intrinsics +_NEON2SSESTORAGE uint8x8_t vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VTBX.8 d0, {d0}, d0 +_NEON2SSE_GLOBAL int8x8_t vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VTBX.8 d0, {d0}, d0 +_NEON2SSE_GLOBAL poly8x8_t vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c); // VTBX.8 d0, {d0}, d0 +_NEON2SSESTORAGE uint8x8_t vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1}, d0 +_NEON2SSE_GLOBAL int8x8_t vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c); // VTBX.8 d0, {d0, d1}, d0 +_NEON2SSE_GLOBAL poly8x8_t vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1}, d0 +_NEON2SSESTORAGE uint8x8_t vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2}, d0 +_NEON2SSE_GLOBAL int8x8_t vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c); // VTBX.8 d0, {d0, d1, d2}, d0 +_NEON2SSE_GLOBAL poly8x8_t vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2}, d0 +_NEON2SSESTORAGE uint8x8_t vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2, d3}, d0 +_NEON2SSE_GLOBAL int8x8_t vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c); // VTBX.8 d0, {d0, d1, d2, d3}, d0 +_NEON2SSE_GLOBAL poly8x8_t vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2, d3}, d0 +//Operations with a scalar value +//Vector multiply accumulate with scalar +_NEON2SSESTORAGE int16x4_t vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VMLA.I16 d0, d0,d0[0] +_NEON2SSESTORAGE int32x2_t vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VMLA.I32 d0, d0,d0[0] +_NEON2SSE_GLOBAL uint16x4_t vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLA.I16 d0, d0,d0[0] +_NEON2SSE_GLOBAL uint32x2_t vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLA.I32 d0, d0,d0[0] +_NEON2SSESTORAGE float32x2_t vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l); // VMLA.F32 d0,d0, d0[0] +_NEON2SSESTORAGE int16x8_t vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l); // VMLA.I16 q0, q0,d0[0] +_NEON2SSESTORAGE int32x4_t vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l); // VMLA.I32 q0, q0,d0[0] +_NEON2SSE_GLOBAL uint16x8_t vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, __constrange(0,3) int l); // VMLA.I16 q0,q0, d0[0] +_NEON2SSE_GLOBAL uint32x4_t vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, __constrange(0,1) int l); // VMLA.I32 q0,q0, d0[0] +_NEON2SSESTORAGE float32x4_t vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l); // VMLA.F32 q0,q0, d0[0] +//Vector widening multiply accumulate with scalar +_NEON2SSESTORAGE int32x4_t vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); //VMLAL.S16 q0, d0,d0[0] +_NEON2SSESTORAGE int64x2_t vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); //VMLAL.S32 q0, d0,d0[0] +_NEON2SSESTORAGE uint32x4_t vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLAL.U16 q0,d0, d0[0] +_NEON2SSESTORAGE uint64x2_t vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLAL.U32 q0,d0, d0[0] +//Vector widening saturating doubling multiply accumulate with scalar +_NEON2SSESTORAGE int32x4_t vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VQDMLAL.S16 q0,d0, d0[0] +_NEON2SSESTORAGE int64x2_t vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VQDMLAL.S32 q0,d0, d0[0] +//Vector multiply subtract with scalar +_NEON2SSESTORAGE int16x4_t vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VMLS.I16 d0, d0,d0[0] +_NEON2SSESTORAGE int32x2_t vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VMLS.I32 d0, d0,d0[0] +_NEON2SSESTORAGE uint16x4_t vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLS.I16 d0, d0,d0[0] +_NEON2SSESTORAGE uint32x2_t vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLS.I32 d0, d0,d0[0] +_NEON2SSESTORAGE float32x2_t vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l); // VMLS.F32 d0,d0, d0[0] +_NEON2SSESTORAGE int16x8_t vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l); // VMLS.I16 q0, q0,d0[0] +_NEON2SSESTORAGE int32x4_t vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l); // VMLS.I32 q0, q0,d0[0] +_NEON2SSESTORAGE uint16x8_t vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, __constrange(0,3) int l); // VMLS.I16 q0,q0, d0[0] +_NEON2SSESTORAGE uint32x4_t vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, __constrange(0,1) int l); // VMLS.I32 q0,q0, d0[0] +_NEON2SSESTORAGE float32x4_t vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l); // VMLS.F32 q0,q0, d0[0] +//Vector widening multiply subtract with scalar +_NEON2SSESTORAGE int32x4_t vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VMLSL.S16 q0, d0,d0[0] +_NEON2SSESTORAGE int64x2_t vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VMLSL.S32 q0, d0,d0[0] +_NEON2SSESTORAGE uint32x4_t vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLSL.U16 q0,d0, d0[0] +_NEON2SSESTORAGE uint64x2_t vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLSL.U32 q0,d0, d0[0] +//Vector widening saturating doubling multiply subtract with scalar +_NEON2SSESTORAGE int32x4_t vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VQDMLSL.S16 q0,d0, d0[0] +_NEON2SSESTORAGE int64x2_t vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VQDMLSL.S32 q0,d0, d0[0] +//Vector multiply by scalar +_NEON2SSESTORAGE int16x4_t vmul_n_s16(int16x4_t a, int16_t b); // VMUL.I16 d0,d0,d0[0] +_NEON2SSESTORAGE int32x2_t vmul_n_s32(int32x2_t a, int32_t b); // VMUL.I32 d0,d0,d0[0] +_NEON2SSESTORAGE float32x2_t vmul_n_f32(float32x2_t a, float32_t b); // VMUL.F32 d0,d0,d0[0] +_NEON2SSESTORAGE uint16x4_t vmul_n_u16(uint16x4_t a, uint16_t b); // VMUL.I16 d0,d0,d0[0] +_NEON2SSESTORAGE uint32x2_t vmul_n_u32(uint32x2_t a, uint32_t b); // VMUL.I32 d0,d0,d0[0] +_NEON2SSESTORAGE int16x8_t vmulq_n_s16(int16x8_t a, int16_t b); // VMUL.I16 q0,q0,d0[0] +_NEON2SSESTORAGE int32x4_t vmulq_n_s32(int32x4_t a, int32_t b); // VMUL.I32 q0,q0,d0[0] +_NEON2SSESTORAGE float32x4_t vmulq_n_f32(float32x4_t a, float32_t b); // VMUL.F32 q0,q0,d0[0] +_NEON2SSESTORAGE uint16x8_t vmulq_n_u16(uint16x8_t a, uint16_t b); // VMUL.I16 q0,q0,d0[0] +_NEON2SSESTORAGE uint32x4_t vmulq_n_u32(uint32x4_t a, uint32_t b); // VMUL.I32 q0,q0,d0[0] +//Vector long multiply with scalar +_NEON2SSESTORAGE int32x4_t vmull_n_s16(int16x4_t vec1, int16_t val2); // VMULL.S16 q0,d0,d0[0] +_NEON2SSESTORAGE int64x2_t vmull_n_s32(int32x2_t vec1, int32_t val2); // VMULL.S32 q0,d0,d0[0] +_NEON2SSESTORAGE uint32x4_t vmull_n_u16(uint16x4_t vec1, uint16_t val2); // VMULL.U16 q0,d0,d0[0] +_NEON2SSESTORAGE uint64x2_t vmull_n_u32(uint32x2_t vec1, uint32_t val2); // VMULL.U32 q0,d0,d0[0] +//Vector long multiply by scalar +_NEON2SSESTORAGE int32x4_t vmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VMULL.S16 q0,d0,d0[0] +_NEON2SSESTORAGE int64x2_t vmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VMULL.S32 q0,d0,d0[0] +_NEON2SSESTORAGE uint32x4_t vmull_lane_u16(uint16x4_t vec1, uint16x4_t val2, __constrange(0, 3) int val3); // VMULL.U16 q0,d0,d0[0] +_NEON2SSESTORAGE uint64x2_t vmull_lane_u32(uint32x2_t vec1, uint32x2_t val2, __constrange(0, 1) int val3); // VMULL.U32 q0,d0,d0[0] +//Vector saturating doubling long multiply with scalar +_NEON2SSESTORAGE int32x4_t vqdmull_n_s16(int16x4_t vec1, int16_t val2); // VQDMULL.S16 q0,d0,d0[0] +_NEON2SSESTORAGE int64x2_t vqdmull_n_s32(int32x2_t vec1, int32_t val2); // VQDMULL.S32 q0,d0,d0[0] +//Vector saturating doubling long multiply by scalar +_NEON2SSESTORAGE int32x4_t vqdmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQDMULL.S16 q0,d0,d0[0] +_NEON2SSESTORAGE int64x2_t vqdmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQDMULL.S32 q0,d0,d0[0] +//Vector saturating doubling multiply high with scalar +_NEON2SSESTORAGE int16x4_t vqdmulh_n_s16(int16x4_t vec1, int16_t val2); // VQDMULH.S16 d0,d0,d0[0] +_NEON2SSESTORAGE int32x2_t vqdmulh_n_s32(int32x2_t vec1, int32_t val2); // VQDMULH.S32 d0,d0,d0[0] +_NEON2SSESTORAGE int16x8_t vqdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQDMULH.S16 q0,q0,d0[0] +_NEON2SSESTORAGE int32x4_t vqdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQDMULH.S32 q0,q0,d0[0] +//Vector saturating doubling multiply high by scalar +_NEON2SSESTORAGE int16x4_t vqdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQDMULH.S16 d0,d0,d0[0] +_NEON2SSESTORAGE int32x2_t vqdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQDMULH.S32 d0,d0,d0[0] +_NEON2SSESTORAGE int16x8_t vqdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQDMULH.S16 q0,q0,d0[0] +_NEON2SSESTORAGE int32x4_t vqdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQDMULH.S32 q0,q0,d0[0] +//Vector saturating rounding doubling multiply high with scalar +_NEON2SSESTORAGE int16x4_t vqrdmulh_n_s16(int16x4_t vec1, int16_t val2); // VQRDMULH.S16 d0,d0,d0[0] +_NEON2SSESTORAGE int32x2_t vqrdmulh_n_s32(int32x2_t vec1, int32_t val2); // VQRDMULH.S32 d0,d0,d0[0] +_NEON2SSESTORAGE int16x8_t vqrdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQRDMULH.S16 q0,q0,d0[0] +_NEON2SSESTORAGE int32x4_t vqrdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQRDMULH.S32 q0,q0,d0[0] +//Vector rounding saturating doubling multiply high by scalar +_NEON2SSESTORAGE int16x4_t vqrdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQRDMULH.S16 d0,d0,d0[0] +_NEON2SSESTORAGE int32x2_t vqrdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQRDMULH.S32 d0,d0,d0[0] +_NEON2SSESTORAGE int16x8_t vqrdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQRDMULH.S16 q0,q0,d0[0] +_NEON2SSESTORAGE int32x4_t vqrdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQRDMULH.S32 q0,q0,d0[0] +//Vector multiply accumulate with scalar +_NEON2SSESTORAGE int16x4_t vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c); // VMLA.I16 d0, d0, d0[0] +_NEON2SSESTORAGE int32x2_t vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c); // VMLA.I32 d0, d0, d0[0] +_NEON2SSE_GLOBAL uint16x4_t vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c); // VMLA.I16 d0, d0, d0[0] +_NEON2SSE_GLOBAL uint32x2_t vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c); // VMLA.I32 d0, d0, d0[0] +_NEON2SSESTORAGE float32x2_t vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c); // VMLA.F32 d0, d0, d0[0] +_NEON2SSESTORAGE int16x8_t vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLA.I16 q0, q0, d0[0] +_NEON2SSESTORAGE int32x4_t vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLA.I32 q0, q0, d0[0] +_NEON2SSE_GLOBAL uint16x8_t vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLA.I16 q0, q0, d0[0] +_NEON2SSE_GLOBAL uint32x4_t vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLA.I32 q0, q0, d0[0] +_NEON2SSESTORAGE float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLA.F32 q0, q0, d0[0] +//Vector widening multiply accumulate with scalar +_NEON2SSESTORAGE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLAL.S16 q0, d0, d0[0] +_NEON2SSESTORAGE int64x2_t vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VMLAL.S32 q0, d0, d0[0] +_NEON2SSESTORAGE uint32x4_t vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c); // VMLAL.U16 q0, d0, d0[0] +_NEON2SSESTORAGE uint64x2_t vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c); // VMLAL.U32 q0, d0, d0[0] +//Vector widening saturating doubling multiply accumulate with scalar +_NEON2SSESTORAGE int32x4_t vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VQDMLAL.S16 q0, d0, d0[0] +_NEON2SSESTORAGE int64x2_t vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VQDMLAL.S32 q0, d0, d0[0] +//Vector multiply subtract with scalar +_NEON2SSESTORAGE int16x4_t vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c); // VMLS.I16 d0, d0, d0[0] +_NEON2SSESTORAGE int32x2_t vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c); // VMLS.I32 d0, d0, d0[0] +_NEON2SSESTORAGE uint16x4_t vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c); // VMLS.I16 d0, d0, d0[0] +_NEON2SSESTORAGE uint32x2_t vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c); // VMLS.I32 d0, d0, d0[0] +_NEON2SSESTORAGE float32x2_t vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c); // VMLS.F32 d0, d0, d0[0] +_NEON2SSESTORAGE int16x8_t vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLS.I16 q0, q0, d0[0] +_NEON2SSESTORAGE int32x4_t vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLS.I32 q0, q0, d0[0] +_NEON2SSESTORAGE uint16x8_t vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLS.I16 q0, q0, d0[0] +_NEON2SSESTORAGE uint32x4_t vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLS.I32 q0, q0, d0[0] +_NEON2SSESTORAGE float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLS.F32 q0, q0, d0[0] +//Vector widening multiply subtract with scalar +_NEON2SSESTORAGE int32x4_t vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLSL.S16 q0, d0, d0[0] +_NEON2SSESTORAGE int64x2_t vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VMLSL.S32 q0, d0, d0[0] +_NEON2SSESTORAGE uint32x4_t vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c); // VMLSL.U16 q0, d0, d0[0] +_NEON2SSESTORAGE uint64x2_t vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c); // VMLSL.U32 q0, d0, d0[0] +//Vector widening saturating doubling multiply subtract with scalar +_NEON2SSESTORAGE int32x4_t vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VQDMLSL.S16 q0, d0, d0[0] +_NEON2SSESTORAGE int64x2_t vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VQDMLSL.S32 q0, d0, d0[0] +//Vector extract +_NEON2SSESTORAGE int8x8_t vext_s8(int8x8_t a, int8x8_t b, __constrange(0,7) int c); // VEXT.8 d0,d0,d0,#0 +_NEON2SSE_GLOBAL uint8x8_t vext_u8(uint8x8_t a, uint8x8_t b, __constrange(0,7) int c); // VEXT.8 d0,d0,d0,#0 +_NEON2SSE_GLOBAL poly8x8_t vext_p8(poly8x8_t a, poly8x8_t b, __constrange(0,7) int c); // VEXT.8 d0,d0,d0,#0 +_NEON2SSESTORAGE int16x4_t vext_s16(int16x4_t a, int16x4_t b, __constrange(0,3) int c); // VEXT.16 d0,d0,d0,#0 +_NEON2SSE_GLOBAL uint16x4_t vext_u16(uint16x4_t a, uint16x4_t b, __constrange(0,3) int c); // VEXT.16 d0,d0,d0,#0 +_NEON2SSE_GLOBAL poly16x4_t vext_p16(poly16x4_t a, poly16x4_t b, __constrange(0,3) int c); // VEXT.16 d0,d0,d0,#0 +_NEON2SSESTORAGE int32x2_t vext_s32(int32x2_t a, int32x2_t b, __constrange(0,1) int c); // VEXT.32 d0,d0,d0,#0 +_NEON2SSE_GLOBAL uint32x2_t vext_u32(uint32x2_t a, uint32x2_t b, __constrange(0,1) int c); // VEXT.32 d0,d0,d0,#0 +_NEON2SSE_GLOBAL int64x1_t vext_s64(int64x1_t a, int64x1_t b, __constrange(0,0) int c); // VEXT.64 d0,d0,d0,#0 +_NEON2SSE_GLOBAL uint64x1_t vext_u64(uint64x1_t a, uint64x1_t b, __constrange(0,0) int c); // VEXT.64 d0,d0,d0,#0 +_NEON2SSESTORAGE float32x2_t vext_f32(float32x2_t a, float32x2_t b, __constrange(0,1) int c); // VEXT.32 d0,d0,d0,#0 +_NEON2SSE_GLOBAL int8x16_t vextq_s8(int8x16_t a, int8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0 +_NEON2SSE_GLOBAL uint8x16_t vextq_u8(uint8x16_t a, uint8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0 +_NEON2SSE_GLOBAL poly8x16_t vextq_p8(poly8x16_t a, poly8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0 +_NEON2SSE_GLOBAL int16x8_t vextq_s16(int16x8_t a, int16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0 +_NEON2SSE_GLOBAL uint16x8_t vextq_u16(uint16x8_t a, uint16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0 +_NEON2SSE_GLOBAL poly16x8_t vextq_p16(poly16x8_t a, poly16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0 +_NEON2SSE_GLOBAL int32x4_t vextq_s32(int32x4_t a, int32x4_t b, __constrange(0,3) int c); // VEXT.32 q0,q0,q0,#0 +_NEON2SSE_GLOBAL uint32x4_t vextq_u32(uint32x4_t a, uint32x4_t b, __constrange(0,3) int c); // VEXT.32 q0,q0,q0,#0 +_NEON2SSE_GLOBAL int64x2_t vextq_s64(int64x2_t a, int64x2_t b, __constrange(0,1) int c); // VEXT.64 q0,q0,q0,#0 +_NEON2SSE_GLOBAL uint64x2_t vextq_u64(uint64x2_t a, uint64x2_t b, __constrange(0,1) int c); // VEXT.64 q0,q0,q0,#0 +_NEON2SSE_GLOBAL float32x4_t vextq_f32(float32x4_t a, float32x4_t b, __constrange(0,3) float c); // VEXT.32 q0,q0,q0,#0 +//Reverse vector elements (swap endianness). VREVn.m reverses the order of the m-bit lanes within a set that is n bits wide. +_NEON2SSESTORAGE int8x8_t vrev64_s8(int8x8_t vec); // VREV64.8 d0,d0 +_NEON2SSESTORAGE int16x4_t vrev64_s16(int16x4_t vec); // VREV64.16 d0,d0 +_NEON2SSESTORAGE int32x2_t vrev64_s32(int32x2_t vec); // VREV64.32 d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vrev64_u8(uint8x8_t vec); // VREV64.8 d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vrev64_u16(uint16x4_t vec); // VREV64.16 d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vrev64_u32(uint32x2_t vec); // VREV64.32 d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vrev64_p8(poly8x8_t vec); // VREV64.8 d0,d0 +_NEON2SSE_GLOBAL poly16x4_t vrev64_p16(poly16x4_t vec); // VREV64.16 d0,d0 +_NEON2SSESTORAGE float32x2_t vrev64_f32(float32x2_t vec); // VREV64.32 d0,d0 +_NEON2SSESTORAGE int8x16_t vrev64q_s8(int8x16_t vec); // VREV64.8 q0,q0 +_NEON2SSESTORAGE int16x8_t vrev64q_s16(int16x8_t vec); // VREV64.16 q0,q0 +_NEON2SSESTORAGE int32x4_t vrev64q_s32(int32x4_t vec); // VREV64.32 q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vrev64q_u8(uint8x16_t vec); // VREV64.8 q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vrev64q_u16(uint16x8_t vec); // VREV64.16 q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vrev64q_u32(uint32x4_t vec); // VREV64.32 q0,q0 +_NEON2SSE_GLOBAL poly8x16_t vrev64q_p8(poly8x16_t vec); // VREV64.8 q0,q0 +_NEON2SSE_GLOBAL poly16x8_t vrev64q_p16(poly16x8_t vec); // VREV64.16 q0,q0 +_NEON2SSE_GLOBAL float32x4_t vrev64q_f32(float32x4_t vec); // VREV64.32 q0,q0 +_NEON2SSESTORAGE int8x8_t vrev32_s8(int8x8_t vec); // VREV32.8 d0,d0 +_NEON2SSESTORAGE int16x4_t vrev32_s16(int16x4_t vec); // VREV32.16 d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vrev32_u8(uint8x8_t vec); // VREV32.8 d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vrev32_u16(uint16x4_t vec); // VREV32.16 d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vrev32_p8(poly8x8_t vec); // VREV32.8 d0,d0 +_NEON2SSE_GLOBAL poly16x4_t vrev32_p16(poly16x4_t vec); // VREV32.16 d0,d0 +_NEON2SSESTORAGE int8x16_t vrev32q_s8(int8x16_t vec); // VREV32.8 q0,q0 +_NEON2SSESTORAGE int16x8_t vrev32q_s16(int16x8_t vec); // VREV32.16 q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vrev32q_u8(uint8x16_t vec); // VREV32.8 q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vrev32q_u16(uint16x8_t vec); // VREV32.16 q0,q0 +_NEON2SSE_GLOBAL poly8x16_t vrev32q_p8(poly8x16_t vec); // VREV32.8 q0,q0 +_NEON2SSE_GLOBAL poly16x8_t vrev32q_p16(poly16x8_t vec); // VREV32.16 q0,q0 +_NEON2SSESTORAGE int8x8_t vrev16_s8(int8x8_t vec); // VREV16.8 d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vrev16_u8(uint8x8_t vec); // VREV16.8 d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vrev16_p8(poly8x8_t vec); // VREV16.8 d0,d0 +_NEON2SSESTORAGE int8x16_t vrev16q_s8(int8x16_t vec); // VREV16.8 q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vrev16q_u8(uint8x16_t vec); // VREV16.8 q0,q0 +_NEON2SSE_GLOBAL poly8x16_t vrev16q_p8(poly8x16_t vec); // VREV16.8 q0,q0 +//Other single operand arithmetic +//Absolute: Vd[i] = |Va[i]| +_NEON2SSESTORAGE int8x8_t vabs_s8(int8x8_t a); // VABS.S8 d0,d0 +_NEON2SSESTORAGE int16x4_t vabs_s16(int16x4_t a); // VABS.S16 d0,d0 +_NEON2SSESTORAGE int32x2_t vabs_s32(int32x2_t a); // VABS.S32 d0,d0 +_NEON2SSESTORAGE float32x2_t vabs_f32(float32x2_t a); // VABS.F32 d0,d0 +_NEON2SSE_GLOBAL int8x16_t vabsq_s8(int8x16_t a); // VABS.S8 q0,q0 +_NEON2SSE_GLOBAL int16x8_t vabsq_s16(int16x8_t a); // VABS.S16 q0,q0 +_NEON2SSE_GLOBAL int32x4_t vabsq_s32(int32x4_t a); // VABS.S32 q0,q0 +_NEON2SSESTORAGE float32x4_t vabsq_f32(float32x4_t a); // VABS.F32 q0,q0 + +#ifdef _NEON2SSE_64BIT +_NEON2SSESTORAGE int64x2_t vabsq_s64(int64x2_t a); // VABS.S64 q0,q0 +_NEON2SSESTORAGE float64x2_t vabsq_f64(float64x2_t a); // VABS.F64 q0,q0 +#endif + +//Saturating absolute: Vd[i] = sat(|Va[i]|) +_NEON2SSESTORAGE int8x8_t vqabs_s8(int8x8_t a); // VQABS.S8 d0,d0 +_NEON2SSESTORAGE int16x4_t vqabs_s16(int16x4_t a); // VQABS.S16 d0,d0 +_NEON2SSESTORAGE int32x2_t vqabs_s32(int32x2_t a); // VQABS.S32 d0,d0 +_NEON2SSESTORAGE int8x16_t vqabsq_s8(int8x16_t a); // VQABS.S8 q0,q0 +_NEON2SSESTORAGE int16x8_t vqabsq_s16(int16x8_t a); // VQABS.S16 q0,q0 +_NEON2SSESTORAGE int32x4_t vqabsq_s32(int32x4_t a); // VQABS.S32 q0,q0 +//Negate: Vd[i] = - Va[i] +_NEON2SSESTORAGE int8x8_t vneg_s8(int8x8_t a); // VNE//d0,d0 +_NEON2SSESTORAGE int16x4_t vneg_s16(int16x4_t a); // VNE//d0,d0 +_NEON2SSESTORAGE int32x2_t vneg_s32(int32x2_t a); // VNE//d0,d0 +_NEON2SSESTORAGE float32x2_t vneg_f32(float32x2_t a); // VNE//d0,d0 +_NEON2SSESTORAGE int8x16_t vnegq_s8(int8x16_t a); // VNE//q0,q0 +_NEON2SSESTORAGE int16x8_t vnegq_s16(int16x8_t a); // VNE//q0,q0 +_NEON2SSESTORAGE int32x4_t vnegq_s32(int32x4_t a); // VNE//q0,q0 +_NEON2SSESTORAGE float32x4_t vnegq_f32(float32x4_t a); // VNE//q0,q0 +//Saturating Negate: sat(Vd[i] = - Va[i]) +_NEON2SSESTORAGE int8x8_t vqneg_s8(int8x8_t a); // VQNE//d0,d0 +_NEON2SSESTORAGE int16x4_t vqneg_s16(int16x4_t a); // VQNE//d0,d0 +_NEON2SSESTORAGE int32x2_t vqneg_s32(int32x2_t a); // VQNE//d0,d0 +_NEON2SSESTORAGE int8x16_t vqnegq_s8(int8x16_t a); // VQNE//q0,q0 +_NEON2SSESTORAGE int16x8_t vqnegq_s16(int16x8_t a); // VQNE//q0,q0 +_NEON2SSESTORAGE int32x4_t vqnegq_s32(int32x4_t a); // VQNE//q0,q0 +//Count leading sign bits +_NEON2SSESTORAGE int8x8_t vcls_s8(int8x8_t a); // VCLS.S8 d0,d0 +_NEON2SSESTORAGE int16x4_t vcls_s16(int16x4_t a); // VCLS.S16 d0,d0 +_NEON2SSESTORAGE int32x2_t vcls_s32(int32x2_t a); // VCLS.S32 d0,d0 +_NEON2SSESTORAGE int8x16_t vclsq_s8(int8x16_t a); // VCLS.S8 q0,q0 +_NEON2SSESTORAGE int16x8_t vclsq_s16(int16x8_t a); // VCLS.S16 q0,q0 +_NEON2SSESTORAGE int32x4_t vclsq_s32(int32x4_t a); // VCLS.S32 q0,q0 +//Count leading zeros +_NEON2SSESTORAGE int8x8_t vclz_s8(int8x8_t a); // VCLZ.I8 d0,d0 +_NEON2SSESTORAGE int16x4_t vclz_s16(int16x4_t a); // VCLZ.I16 d0,d0 +_NEON2SSESTORAGE int32x2_t vclz_s32(int32x2_t a); // VCLZ.I32 d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vclz_u8(uint8x8_t a); // VCLZ.I8 d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vclz_u16(uint16x4_t a); // VCLZ.I16 d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vclz_u32(uint32x2_t a); // VCLZ.I32 d0,d0 +_NEON2SSESTORAGE int8x16_t vclzq_s8(int8x16_t a); // VCLZ.I8 q0,q0 +_NEON2SSESTORAGE int16x8_t vclzq_s16(int16x8_t a); // VCLZ.I16 q0,q0 +_NEON2SSESTORAGE int32x4_t vclzq_s32(int32x4_t a); // VCLZ.I32 q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vclzq_u8(uint8x16_t a); // VCLZ.I8 q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vclzq_u16(uint16x8_t a); // VCLZ.I16 q0,q0 +_NEON2SSESTORAGE uint32x4_t vclzq_u32(uint32x4_t a); // VCLZ.I32 q0,q0 +//Count number of set bits +_NEON2SSESTORAGE uint8x8_t vcnt_u8(uint8x8_t a); // VCNT.8 d0,d0 +_NEON2SSE_GLOBAL int8x8_t vcnt_s8(int8x8_t a); // VCNT.8 d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vcnt_p8(poly8x8_t a); // VCNT.8 d0,d0 +_NEON2SSESTORAGE uint8x16_t vcntq_u8(uint8x16_t a); // VCNT.8 q0,q0 +_NEON2SSE_GLOBAL int8x16_t vcntq_s8(int8x16_t a); // VCNT.8 q0,q0 +_NEON2SSE_GLOBAL poly8x16_t vcntq_p8(poly8x16_t a); // VCNT.8 q0,q0 +//Reciprocal estimate +_NEON2SSESTORAGE float32x2_t vrecpe_f32(float32x2_t a); // VRECPE.F32 d0,d0 +_NEON2SSESTORAGE uint32x2_t vrecpe_u32(uint32x2_t a); // VRECPE.U32 d0,d0 +_NEON2SSE_GLOBAL float32x4_t vrecpeq_f32(float32x4_t a); // VRECPE.F32 q0,q0 +_NEON2SSESTORAGE uint32x4_t vrecpeq_u32(uint32x4_t a); // VRECPE.U32 q0,q0 +//Reciprocal square root estimate +_NEON2SSESTORAGE float32x2_t vrsqrte_f32(float32x2_t a); // VRSQRTE.F32 d0,d0 +_NEON2SSESTORAGE uint32x2_t vrsqrte_u32(uint32x2_t a); // VRSQRTE.U32 d0,d0 +_NEON2SSE_GLOBAL float32x4_t vrsqrteq_f32(float32x4_t a); // VRSQRTE.F32 q0,q0 +_NEON2SSESTORAGE uint32x4_t vrsqrteq_u32(uint32x4_t a); // VRSQRTE.U32 q0,q0 +//Logical operations +//Bitwise not +_NEON2SSESTORAGE int8x8_t vmvn_s8(int8x8_t a); // VMVN d0,d0 +_NEON2SSESTORAGE int16x4_t vmvn_s16(int16x4_t a); // VMVN d0,d0 +_NEON2SSESTORAGE int32x2_t vmvn_s32(int32x2_t a); // VMVN d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vmvn_u8(uint8x8_t a); // VMVN d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vmvn_u16(uint16x4_t a); // VMVN d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vmvn_u32(uint32x2_t a); // VMVN d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vmvn_p8(poly8x8_t a); // VMVN d0,d0 +_NEON2SSESTORAGE int8x16_t vmvnq_s8(int8x16_t a); // VMVN q0,q0 +_NEON2SSESTORAGE int16x8_t vmvnq_s16(int16x8_t a); // VMVN q0,q0 +_NEON2SSESTORAGE int32x4_t vmvnq_s32(int32x4_t a); // VMVN q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vmvnq_u8(uint8x16_t a); // VMVN q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vmvnq_u16(uint16x8_t a); // VMVN q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vmvnq_u32(uint32x4_t a); // VMVN q0,q0 +_NEON2SSE_GLOBAL poly8x16_t vmvnq_p8(poly8x16_t a); // VMVN q0,q0 +//Bitwise and +_NEON2SSESTORAGE int8x8_t vand_s8(int8x8_t a, int8x8_t b); // VAND d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vand_s16(int16x4_t a, int16x4_t b); // VAND d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vand_s32(int32x2_t a, int32x2_t b); // VAND d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vand_s64(int64x1_t a, int64x1_t b); // VAND d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vand_u8(uint8x8_t a, uint8x8_t b); // VAND d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vand_u16(uint16x4_t a, uint16x4_t b); // VAND d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vand_u32(uint32x2_t a, uint32x2_t b); // VAND d0,d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vand_u64(uint64x1_t a, uint64x1_t b); // VAND d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vandq_s8(int8x16_t a, int8x16_t b); // VAND q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vandq_s16(int16x8_t a, int16x8_t b); // VAND q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vandq_s32(int32x4_t a, int32x4_t b); // VAND q0,q0,q0 +_NEON2SSE_GLOBAL int64x2_t vandq_s64(int64x2_t a, int64x2_t b); // VAND q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vandq_u8(uint8x16_t a, uint8x16_t b); // VAND q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vandq_u16(uint16x8_t a, uint16x8_t b); // VAND q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vandq_u32(uint32x4_t a, uint32x4_t b); // VAND q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vandq_u64(uint64x2_t a, uint64x2_t b); // VAND q0,q0,q0 +//Bitwise or +_NEON2SSESTORAGE int8x8_t vorr_s8(int8x8_t a, int8x8_t b); // VORR d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vorr_s16(int16x4_t a, int16x4_t b); // VORR d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vorr_s32(int32x2_t a, int32x2_t b); // VORR d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vorr_s64(int64x1_t a, int64x1_t b); // VORR d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vorr_u8(uint8x8_t a, uint8x8_t b); // VORR d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vorr_u16(uint16x4_t a, uint16x4_t b); // VORR d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vorr_u32(uint32x2_t a, uint32x2_t b); // VORR d0,d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vorr_u64(uint64x1_t a, uint64x1_t b); // VORR d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vorrq_s8(int8x16_t a, int8x16_t b); // VORR q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vorrq_s16(int16x8_t a, int16x8_t b); // VORR q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vorrq_s32(int32x4_t a, int32x4_t b); // VORR q0,q0,q0 +_NEON2SSE_GLOBAL int64x2_t vorrq_s64(int64x2_t a, int64x2_t b); // VORR q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vorrq_u8(uint8x16_t a, uint8x16_t b); // VORR q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vorrq_u16(uint16x8_t a, uint16x8_t b); // VORR q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vorrq_u32(uint32x4_t a, uint32x4_t b); // VORR q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vorrq_u64(uint64x2_t a, uint64x2_t b); // VORR q0,q0,q0 +//Bitwise exclusive or (EOR or XOR) +_NEON2SSESTORAGE int8x8_t veor_s8(int8x8_t a, int8x8_t b); // VEOR d0,d0,d0 +_NEON2SSE_GLOBAL int16x4_t veor_s16(int16x4_t a, int16x4_t b); // VEOR d0,d0,d0 +_NEON2SSE_GLOBAL int32x2_t veor_s32(int32x2_t a, int32x2_t b); // VEOR d0,d0,d0 +_NEON2SSESTORAGE int64x1_t veor_s64(int64x1_t a, int64x1_t b); // VEOR d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t veor_u8(uint8x8_t a, uint8x8_t b); // VEOR d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t veor_u16(uint16x4_t a, uint16x4_t b); // VEOR d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t veor_u32(uint32x2_t a, uint32x2_t b); // VEOR d0,d0,d0 +_NEON2SSE_GLOBAL uint64x1_t veor_u64(uint64x1_t a, uint64x1_t b); // VEOR d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t veorq_s8(int8x16_t a, int8x16_t b); // VEOR q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t veorq_s16(int16x8_t a, int16x8_t b); // VEOR q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t veorq_s32(int32x4_t a, int32x4_t b); // VEOR q0,q0,q0 +_NEON2SSE_GLOBAL int64x2_t veorq_s64(int64x2_t a, int64x2_t b); // VEOR q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t veorq_u8(uint8x16_t a, uint8x16_t b); // VEOR q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t veorq_u16(uint16x8_t a, uint16x8_t b); // VEOR q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t veorq_u32(uint32x4_t a, uint32x4_t b); // VEOR q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b); // VEOR q0,q0,q0 +//Bit Clear +_NEON2SSESTORAGE int8x8_t vbic_s8(int8x8_t a, int8x8_t b); // VBIC d0,d0,d0 +_NEON2SSE_GLOBAL int16x4_t vbic_s16(int16x4_t a, int16x4_t b); // VBIC d0,d0,d0 +_NEON2SSE_GLOBAL int32x2_t vbic_s32(int32x2_t a, int32x2_t b); // VBIC d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vbic_s64(int64x1_t a, int64x1_t b); // VBIC d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vbic_u8(uint8x8_t a, uint8x8_t b); // VBIC d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vbic_u16(uint16x4_t a, uint16x4_t b); // VBIC d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vbic_u32(uint32x2_t a, uint32x2_t b); // VBIC d0,d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vbic_u64(uint64x1_t a, uint64x1_t b); // VBIC d0,d0,d0 +_NEON2SSE_GLOBAL int8x16_t vbicq_s8(int8x16_t a, int8x16_t b); // VBIC q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vbicq_s16(int16x8_t a, int16x8_t b); // VBIC q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vbicq_s32(int32x4_t a, int32x4_t b); // VBIC q0,q0,q0 +_NEON2SSE_GLOBAL int64x2_t vbicq_s64(int64x2_t a, int64x2_t b); // VBIC q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vbicq_u8(uint8x16_t a, uint8x16_t b); // VBIC q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vbicq_u16(uint16x8_t a, uint16x8_t b); // VBIC q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vbicq_u32(uint32x4_t a, uint32x4_t b); // VBIC q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vbicq_u64(uint64x2_t a, uint64x2_t b); // VBIC q0,q0,q0 +//Bitwise OR complement +_NEON2SSESTORAGE int8x8_t vorn_s8(int8x8_t a, int8x8_t b); // VORN d0,d0,d0 +_NEON2SSESTORAGE int16x4_t vorn_s16(int16x4_t a, int16x4_t b); // VORN d0,d0,d0 +_NEON2SSESTORAGE int32x2_t vorn_s32(int32x2_t a, int32x2_t b); // VORN d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vorn_s64(int64x1_t a, int64x1_t b); // VORN d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vorn_u8(uint8x8_t a, uint8x8_t b); // VORN d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vorn_u16(uint16x4_t a, uint16x4_t b); // VORN d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vorn_u32(uint32x2_t a, uint32x2_t b); // VORN d0,d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vorn_u64(uint64x1_t a, uint64x1_t b); // VORN d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vornq_s8(int8x16_t a, int8x16_t b); // VORN q0,q0,q0 +_NEON2SSESTORAGE int16x8_t vornq_s16(int16x8_t a, int16x8_t b); // VORN q0,q0,q0 +_NEON2SSESTORAGE int32x4_t vornq_s32(int32x4_t a, int32x4_t b); // VORN q0,q0,q0 +_NEON2SSESTORAGE int64x2_t vornq_s64(int64x2_t a, int64x2_t b); // VORN q0,q0,q0 +_NEON2SSESTORAGE uint8x16_t vornq_u8(uint8x16_t a, uint8x16_t b); // VORN q0,q0,q0 +_NEON2SSESTORAGE uint16x8_t vornq_u16(uint16x8_t a, uint16x8_t b); // VORN q0,q0,q0 +_NEON2SSESTORAGE uint32x4_t vornq_u32(uint32x4_t a, uint32x4_t b); // VORN q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vornq_u64(uint64x2_t a, uint64x2_t b); // VORN q0,q0,q0 +//Bitwise Select +_NEON2SSESTORAGE int8x8_t vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL int16x4_t vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL int32x2_t vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c); // VBSL d0,d0,d0 +_NEON2SSESTORAGE int64x1_t vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL uint8x8_t vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL uint16x4_t vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL uint32x2_t vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL uint64x1_t vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c); // VBSL d0,d0,d0 +_NEON2SSESTORAGE float32x2_t vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL poly8x8_t vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c); // VBSL d0,d0,d0 +_NEON2SSE_GLOBAL poly16x4_t vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c); // VBSL d0,d0,d0 +_NEON2SSESTORAGE int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL int16x8_t vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL int32x4_t vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL int64x2_t vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL uint8x16_t vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL uint16x8_t vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL uint32x4_t vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL uint64x2_t vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c); // VBSL q0,q0,q0 +_NEON2SSESTORAGE float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL poly8x16_t vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c); // VBSL q0,q0,q0 +_NEON2SSE_GLOBAL poly16x8_t vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c); // VBSL q0,q0,q0 +//Transposition operations +//Transpose elements +_NEON2SSESTORAGE int8x8x2_t vtrn_s8(int8x8_t a, int8x8_t b); // VTRN.8 d0,d0 +_NEON2SSESTORAGE int16x4x2_t vtrn_s16(int16x4_t a, int16x4_t b); // VTRN.16 d0,d0 +_NEON2SSESTORAGE int32x2x2_t vtrn_s32(int32x2_t a, int32x2_t b); // VTRN.32 d0,d0 +_NEON2SSE_GLOBAL uint8x8x2_t vtrn_u8(uint8x8_t a, uint8x8_t b); // VTRN.8 d0,d0 +_NEON2SSE_GLOBAL uint16x4x2_t vtrn_u16(uint16x4_t a, uint16x4_t b); // VTRN.16 d0,d0 +_NEON2SSE_GLOBAL uint32x2x2_t vtrn_u32(uint32x2_t a, uint32x2_t b); // VTRN.32 d0,d0 +_NEON2SSESTORAGE float32x2x2_t vtrn_f32(float32x2_t a, float32x2_t b); // VTRN.32 d0,d0 +_NEON2SSE_GLOBAL poly8x8x2_t vtrn_p8(poly8x8_t a, poly8x8_t b); // VTRN.8 d0,d0 +_NEON2SSE_GLOBAL poly16x4x2_t vtrn_p16(poly16x4_t a, poly16x4_t b); // VTRN.16 d0,d0 +_NEON2SSESTORAGE int8x16x2_t vtrnq_s8(int8x16_t a, int8x16_t b); // VTRN.8 q0,q0 +_NEON2SSESTORAGE int16x8x2_t vtrnq_s16(int16x8_t a, int16x8_t b); // VTRN.16 q0,q0 +_NEON2SSESTORAGE int32x4x2_t vtrnq_s32(int32x4_t a, int32x4_t b); // VTRN.32 q0,q0 +_NEON2SSE_GLOBAL uint8x16x2_t vtrnq_u8(uint8x16_t a, uint8x16_t b); // VTRN.8 q0,q0 +_NEON2SSE_GLOBAL uint16x8x2_t vtrnq_u16(uint16x8_t a, uint16x8_t b); // VTRN.16 q0,q0 +_NEON2SSE_GLOBAL uint32x4x2_t vtrnq_u32(uint32x4_t a, uint32x4_t b); // VTRN.32 q0,q0 +_NEON2SSESTORAGE float32x4x2_t vtrnq_f32(float32x4_t a, float32x4_t b); // VTRN.32 q0,q0 +_NEON2SSE_GLOBAL poly8x16x2_t vtrnq_p8(poly8x16_t a, poly8x16_t b); // VTRN.8 q0,q0 +_NEON2SSE_GLOBAL poly16x8x2_t vtrnq_p16(poly16x8_t a, poly16x8_t b); // VTRN.16 q0,q0 +//Interleave elements +_NEON2SSESTORAGE int8x8x2_t vzip_s8(int8x8_t a, int8x8_t b); // VZIP.8 d0,d0 +_NEON2SSESTORAGE int16x4x2_t vzip_s16(int16x4_t a, int16x4_t b); // VZIP.16 d0,d0 +_NEON2SSE_GLOBAL int32x2x2_t vzip_s32(int32x2_t a, int32x2_t b); // VZIP.32 d0,d0 +_NEON2SSE_GLOBAL uint8x8x2_t vzip_u8(uint8x8_t a, uint8x8_t b); // VZIP.8 d0,d0 +_NEON2SSE_GLOBAL uint16x4x2_t vzip_u16(uint16x4_t a, uint16x4_t b); // VZIP.16 d0,d0 +_NEON2SSE_GLOBAL uint32x2x2_t vzip_u32(uint32x2_t a, uint32x2_t b); // VZIP.32 d0,d0 +_NEON2SSE_GLOBAL float32x2x2_t vzip_f32(float32x2_t a, float32x2_t b); // VZIP.32 d0,d0 +_NEON2SSE_GLOBAL poly8x8x2_t vzip_p8(poly8x8_t a, poly8x8_t b); // VZIP.8 d0,d0 +_NEON2SSE_GLOBAL poly16x4x2_t vzip_p16(poly16x4_t a, poly16x4_t b); // VZIP.16 d0,d0 +_NEON2SSESTORAGE int8x16x2_t vzipq_s8(int8x16_t a, int8x16_t b); // VZIP.8 q0,q0 +_NEON2SSESTORAGE int16x8x2_t vzipq_s16(int16x8_t a, int16x8_t b); // VZIP.16 q0,q0 +_NEON2SSESTORAGE int32x4x2_t vzipq_s32(int32x4_t a, int32x4_t b); // VZIP.32 q0,q0 +_NEON2SSE_GLOBAL uint8x16x2_t vzipq_u8(uint8x16_t a, uint8x16_t b); // VZIP.8 q0,q0 +_NEON2SSE_GLOBAL uint16x8x2_t vzipq_u16(uint16x8_t a, uint16x8_t b); // VZIP.16 q0,q0 +_NEON2SSE_GLOBAL uint32x4x2_t vzipq_u32(uint32x4_t a, uint32x4_t b); // VZIP.32 q0,q0 +_NEON2SSESTORAGE float32x4x2_t vzipq_f32(float32x4_t a, float32x4_t b); // VZIP.32 q0,q0 +_NEON2SSE_GLOBAL poly8x16x2_t vzipq_p8(poly8x16_t a, poly8x16_t b); // VZIP.8 q0,q0 +_NEON2SSE_GLOBAL poly16x8x2_t vzipq_p16(poly16x8_t a, poly16x8_t b); // VZIP.16 q0,q0 +//De-Interleave elements +_NEON2SSESTORAGE int8x8x2_t vuzp_s8(int8x8_t a, int8x8_t b); // VUZP.8 d0,d0 +_NEON2SSESTORAGE int16x4x2_t vuzp_s16(int16x4_t a, int16x4_t b); // VUZP.16 d0,d0 +_NEON2SSESTORAGE int32x2x2_t vuzp_s32(int32x2_t a, int32x2_t b); // VUZP.32 d0,d0 +_NEON2SSE_GLOBAL uint8x8x2_t vuzp_u8(uint8x8_t a, uint8x8_t b); // VUZP.8 d0,d0 +_NEON2SSE_GLOBAL uint16x4x2_t vuzp_u16(uint16x4_t a, uint16x4_t b); // VUZP.16 d0,d0 +_NEON2SSE_GLOBAL uint32x2x2_t vuzp_u32(uint32x2_t a, uint32x2_t b); // VUZP.32 d0,d0 +_NEON2SSE_GLOBAL float32x2x2_t vuzp_f32(float32x2_t a, float32x2_t b); // VUZP.32 d0,d0 +_NEON2SSE_GLOBAL poly8x8x2_t vuzp_p8(poly8x8_t a, poly8x8_t b); // VUZP.8 d0,d0 +_NEON2SSE_GLOBAL poly16x4x2_t vuzp_p16(poly16x4_t a, poly16x4_t b); // VUZP.16 d0,d0 +_NEON2SSESTORAGE int8x16x2_t vuzpq_s8(int8x16_t a, int8x16_t b); // VUZP.8 q0,q0 +_NEON2SSESTORAGE int16x8x2_t vuzpq_s16(int16x8_t a, int16x8_t b); // VUZP.16 q0,q0 +_NEON2SSESTORAGE int32x4x2_t vuzpq_s32(int32x4_t a, int32x4_t b); // VUZP.32 q0,q0 +_NEON2SSE_GLOBAL uint8x16x2_t vuzpq_u8(uint8x16_t a, uint8x16_t b); // VUZP.8 q0,q0 +_NEON2SSE_GLOBAL uint16x8x2_t vuzpq_u16(uint16x8_t a, uint16x8_t b); // VUZP.16 q0,q0 +_NEON2SSE_GLOBAL uint32x4x2_t vuzpq_u32(uint32x4_t a, uint32x4_t b); // VUZP.32 q0,q0 +_NEON2SSESTORAGE float32x4x2_t vuzpq_f32(float32x4_t a, float32x4_t b); // VUZP.32 q0,q0 +_NEON2SSE_GLOBAL poly8x16x2_t vuzpq_p8(poly8x16_t a, poly8x16_t b); // VUZP.8 q0,q0 +_NEON2SSE_GLOBAL poly16x8x2_t vuzpq_p16(poly16x8_t a, poly16x8_t b); // VUZP.16 q0,q0 + +_NEON2SSESTORAGE float32x4_t vrndnq_f32(float32x4_t a); // VRND.F32 q0,q0 +_NEON2SSESTORAGE float64x2_t vrndnq_f64(float64x2_t a); // VRND.F64 q0,q0 + +//Sqrt +_NEON2SSE_GLOBAL float32x4_t vsqrtq_f32(float32x4_t a); // VSQRT.F32 q0,q0 +_NEON2SSE_GLOBAL float64x2_t vsqrtq_f64(float64x2_t a); // VSQRT.F64 q0,q0 + + +//^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// the following macros solve the problem of the "immediate parameters requirement" for some x86 intrinsics. +// we need it to compile the code unless the "Intrinsic parameter must be an immediate value" error is our goal +// +#if ( defined (__INTEL_COMPILER) && !defined(__llvm__) ) +# define _MM_ALIGNR_EPI8 _mm_alignr_epi8 +# define _MM_EXTRACT_EPI16 (int16_t) _mm_extract_epi16 +# define _MM_INSERT_EPI16 _mm_insert_epi16 +# ifdef USE_SSE4 +# define _MM_EXTRACT_EPI8 _mm_extract_epi8 +# define _MM_EXTRACT_EPI32 _mm_extract_epi32 +# define _MM_EXTRACT_PS _mm_extract_ps +# define _MM_INSERT_EPI8 _mm_insert_epi8 +# define _MM_INSERT_EPI32 _mm_insert_epi32 +# define _MM_INSERT_PS _mm_insert_ps +# ifdef _NEON2SSE_64BIT +# define _MM_INSERT_EPI64 _mm_insert_epi64 +# define _MM_EXTRACT_EPI64 _mm_extract_epi64 +# endif +# endif //SSE4 +#else +# define _NEON2SSE_COMMA , +# define _NEON2SSE_SWITCH16(NAME, a, b, LANE) \ + switch(LANE) \ + { \ + case 0: return NAME(a b, 0); \ + case 1: return NAME(a b, 1); \ + case 2: return NAME(a b, 2); \ + case 3: return NAME(a b, 3); \ + case 4: return NAME(a b, 4); \ + case 5: return NAME(a b, 5); \ + case 6: return NAME(a b, 6); \ + case 7: return NAME(a b, 7); \ + case 8: return NAME(a b, 8); \ + case 9: return NAME(a b, 9); \ + case 10: return NAME(a b, 10); \ + case 11: return NAME(a b, 11); \ + case 12: return NAME(a b, 12); \ + case 13: return NAME(a b, 13); \ + case 14: return NAME(a b, 14); \ + case 15: return NAME(a b, 15); \ + default: return NAME(a b, 0); \ + } + +# define _NEON2SSE_SWITCH8(NAME, vec, LANE, p) \ + switch(LANE) \ + { \ + case 0: return NAME(vec p,0); \ + case 1: return NAME(vec p,1); \ + case 2: return NAME(vec p,2); \ + case 3: return NAME(vec p,3); \ + case 4: return NAME(vec p,4); \ + case 5: return NAME(vec p,5); \ + case 6: return NAME(vec p,6); \ + case 7: return NAME(vec p,7); \ + default: return NAME(vec p,0); \ + } + +# define _NEON2SSE_SWITCH4(NAME, case0, case1, case2, case3, vec, LANE, p) \ + switch(LANE) \ + { \ + case case0: return NAME(vec p,case0); \ + case case1: return NAME(vec p,case1); \ + case case2: return NAME(vec p,case2); \ + case case3: return NAME(vec p,case3); \ + default: return NAME(vec p,case0); \ + } + + _NEON2SSE_INLINE __m128i _MM_ALIGNR_EPI8(__m128i a, __m128i b, int LANE) + { + _NEON2SSE_SWITCH16(_mm_alignr_epi8, a, _NEON2SSE_COMMA b, LANE) + } + + _NEON2SSE_INLINE __m128i _MM_INSERT_EPI16(__m128i vec, int p, const int LANE) + { + _NEON2SSE_SWITCH8(_mm_insert_epi16, vec, LANE, _NEON2SSE_COMMA p) + } + + _NEON2SSE_INLINE int16_t _MM_EXTRACT_EPI16(__m128i vec, const int LANE) + { + _NEON2SSE_SWITCH8((int16_t)_mm_extract_epi16, vec, LANE,) + } + +#ifdef USE_SSE4 + _NEON2SSE_INLINE int _MM_EXTRACT_EPI32(__m128i vec, const int LANE) + { + _NEON2SSE_SWITCH4(_mm_extract_epi32, 0,1,2,3, vec, LANE,) + } + + _NEON2SSE_INLINE int _MM_EXTRACT_PS(__m128 vec, const int LANE) + { + _NEON2SSE_SWITCH4(_mm_extract_ps, 0,1,2,3, vec, LANE,) + } + + _NEON2SSE_INLINE int _MM_EXTRACT_EPI8(__m128i vec, const int LANE) + { + _NEON2SSE_SWITCH16(_mm_extract_epi8, vec, , LANE) + } + + _NEON2SSE_INLINE __m128i _MM_INSERT_EPI32(__m128i vec, int p, const int LANE) + { + _NEON2SSE_SWITCH4(_mm_insert_epi32, 0, 1, 2, 3, vec, LANE, _NEON2SSE_COMMA p) + } + + _NEON2SSE_INLINE __m128i _MM_INSERT_EPI8(__m128i vec, int p, const int LANE) + { + _NEON2SSE_SWITCH16(_mm_insert_epi8, vec, _NEON2SSE_COMMA p, LANE) + } + +#ifdef _NEON2SSE_64BIT + //the special case of functions available only for SSE4 and 64-bit build. + _NEON2SSE_INLINE __m128i _MM_INSERT_EPI64(__m128i vec, int64_t p, const int LANE) + { + switch(LANE) { + case 0: + return _mm_insert_epi64(vec, p, 0); + case 1: + return _mm_insert_epi64(vec, p, 1); + default: + return _mm_insert_epi64(vec, p, 0); + } + } + + _NEON2SSE_INLINE int64_t _MM_EXTRACT_EPI64(__m128i val, const int LANE) + { + if (LANE ==0) return _mm_extract_epi64(val, 0); + else return _mm_extract_epi64(val, 1); + } +#endif + + _NEON2SSE_INLINE __m128 _MM_INSERT_PS(__m128 vec, __m128 p, const int LANE) + { + _NEON2SSE_SWITCH4(_mm_insert_ps, 0, 16, 32, 48, vec, LANE, _NEON2SSE_COMMA p) + } + +#endif //USE_SSE4 + +#endif //#ifdef NDEBUG + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Below are some helper functions used either for SSE4 intrinsics "emulation" for SSSE3 limited devices +// or for some specific commonly used operations implementation missing in SSE +#ifdef USE_SSE4 +# define _MM_CVTEPU8_EPI16 _mm_cvtepu8_epi16 +# define _MM_CVTEPU16_EPI32 _mm_cvtepu16_epi32 +# define _MM_CVTEPU32_EPI64 _mm_cvtepu32_epi64 + +# define _MM_CVTEPI8_EPI16 _mm_cvtepi8_epi16 +# define _MM_CVTEPI16_EPI32 _mm_cvtepi16_epi32 +# define _MM_CVTEPI32_EPI64 _mm_cvtepi32_epi64 + +# define _MM_MAX_EPI8 _mm_max_epi8 +# define _MM_MAX_EPI32 _mm_max_epi32 +# define _MM_MAX_EPU16 _mm_max_epu16 +# define _MM_MAX_EPU32 _mm_max_epu32 + +# define _MM_MIN_EPI8 _mm_min_epi8 +# define _MM_MIN_EPI32 _mm_min_epi32 +# define _MM_MIN_EPU16 _mm_min_epu16 +# define _MM_MIN_EPU32 _mm_min_epu32 + +# define _MM_BLENDV_EPI8 _mm_blendv_epi8 +# define _MM_PACKUS_EPI32 _mm_packus_epi32 +# define _MM_PACKUS1_EPI32(a) _mm_packus_epi32(a, a) + +# define _MM_MULLO_EPI32 _mm_mullo_epi32 +# define _MM_MUL_EPI32 _mm_mul_epi32 + +# define _MM_CMPEQ_EPI64 _mm_cmpeq_epi64 +#else //no SSE4 !!!!!! + _NEON2SSE_INLINE __m128i _MM_CVTEPU8_EPI16(__m128i a) + { + __m128i zero = _mm_setzero_si128(); + return _mm_unpacklo_epi8(a, zero); + } + + _NEON2SSE_INLINE __m128i _MM_CVTEPU16_EPI32(__m128i a) + { + __m128i zero = _mm_setzero_si128(); + return _mm_unpacklo_epi16(a, zero); + } + + _NEON2SSE_INLINE __m128i _MM_CVTEPU32_EPI64(__m128i a) + { + __m128i zero = _mm_setzero_si128(); + return _mm_unpacklo_epi32(a, zero); + } + + _NEON2SSE_INLINE __m128i _MM_CVTEPI8_EPI16(__m128i a) + { + __m128i zero = _mm_setzero_si128(); + __m128i sign = _mm_cmpgt_epi8(zero, a); + return _mm_unpacklo_epi8(a, sign); + } + + _NEON2SSE_INLINE __m128i _MM_CVTEPI16_EPI32(__m128i a) + { + __m128i zero = _mm_setzero_si128(); + __m128i sign = _mm_cmpgt_epi16(zero, a); + return _mm_unpacklo_epi16(a, sign); + } + + _NEON2SSE_INLINE __m128i _MM_CVTEPI32_EPI64(__m128i a) + { + __m128i zero = _mm_setzero_si128(); + __m128i sign = _mm_cmpgt_epi32(zero, a); + return _mm_unpacklo_epi32(a, sign); + } + + _NEON2SSE_INLINE int _MM_EXTRACT_EPI32(__m128i vec, const int LANE) + { + _NEON2SSE_ALIGN_16 int32_t tmp[4]; + _mm_store_si128((__m128i*)tmp, vec); + return tmp[LANE]; + } + + _NEON2SSE_INLINE int _MM_EXTRACT_EPI8(__m128i vec, const int LANE) + { + _NEON2SSE_ALIGN_16 int8_t tmp[16]; + _mm_store_si128((__m128i*)tmp, vec); + return (int)tmp[LANE]; + } + + _NEON2SSE_INLINE int _MM_EXTRACT_PS(__m128 vec, const int LANE) + { + _NEON2SSE_ALIGN_16 int32_t tmp[4]; + _mm_store_si128((__m128i*)tmp, _M128i(vec)); + return tmp[LANE]; + } + + _NEON2SSE_INLINE __m128i _MM_INSERT_EPI32(__m128i vec, int p, const int LANE) + { + _NEON2SSE_ALIGN_16 int32_t pvec[4] = {0,0,0,0}; + _NEON2SSE_ALIGN_16 uint32_t mask[4] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff}; + __m128i vec_masked, p_masked; + pvec[LANE] = p; + mask[LANE] = 0x0; + vec_masked = _mm_and_si128 (*(__m128i*)mask,vec); //ready for p + p_masked = _mm_andnot_si128 (*(__m128i*)mask,*(__m128i*)pvec); //ready for vec + return _mm_or_si128(vec_masked, p_masked); + } + + _NEON2SSE_INLINE __m128i _MM_INSERT_EPI8(__m128i vec, int p, const int LANE) + { + _NEON2SSE_ALIGN_16 int8_t pvec[16] = {0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0}; + _NEON2SSE_ALIGN_16 uint8_t mask[16] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; + __m128i vec_masked, p_masked; + pvec[LANE] = (int8_t)p; + mask[LANE] = 0x0; + vec_masked = _mm_and_si128 (*(__m128i*)mask,vec); //ready for p + p_masked = _mm_andnot_si128 (*(__m128i*)mask,*(__m128i*)pvec); //ready for vec + return _mm_or_si128(vec_masked, p_masked); + } + + _NEON2SSE_INLINE __m128 _MM_INSERT_PS(__m128 vec, __m128 p, const int LANE) + { + _NEON2SSE_ALIGN_16 uint32_t mask[4] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff}; + __m128 tmp, vec_masked, p_masked; + mask[LANE >> 4] = 0x0; //here the LANE is not actural lane, need to deal with it + vec_masked = _mm_and_ps (*(__m128*)mask,vec); //ready for p + p_masked = _mm_andnot_ps (*(__m128*)mask, p); //ready for vec + tmp = _mm_or_ps(vec_masked, p_masked); + return tmp; + } + + _NEON2SSE_INLINE __m128i _MM_MAX_EPI8(__m128i a, __m128i b) + { + __m128i cmp, resa, resb; + cmp = _mm_cmpgt_epi8 (a, b); + resa = _mm_and_si128 (cmp, a); + resb = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(resa, resb); + } + + _NEON2SSE_INLINE __m128i _MM_MAX_EPI32(__m128i a, __m128i b) + { + __m128i cmp, resa, resb; + cmp = _mm_cmpgt_epi32(a, b); + resa = _mm_and_si128 (cmp, a); + resb = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(resa, resb); + } + + _NEON2SSE_INLINE __m128i _MM_MAX_EPU16(__m128i a, __m128i b) + { + __m128i c8000, b_s, a_s, cmp; + c8000 = _mm_cmpeq_epi16 (a,a); //0xffff + c8000 = _mm_slli_epi16 (c8000, 15); //0x8000 + b_s = _mm_sub_epi16 (b, c8000); + a_s = _mm_sub_epi16 (a, c8000); + cmp = _mm_cmpgt_epi16 (a_s, b_s); //no unsigned comparison, need to go to signed + a_s = _mm_and_si128 (cmp,a); + b_s = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(a_s, b_s); + } + + _NEON2SSE_INLINE __m128i _MM_MAX_EPU32(__m128i a, __m128i b) + { + __m128i c80000000, b_s, a_s, cmp; + c80000000 = _mm_cmpeq_epi32 (a,a); //0xffffffff + c80000000 = _mm_slli_epi32 (c80000000, 31); //0x80000000 + b_s = _mm_sub_epi32 (b, c80000000); + a_s = _mm_sub_epi32 (a, c80000000); + cmp = _mm_cmpgt_epi32 (a_s, b_s); //no unsigned comparison, need to go to signed + a_s = _mm_and_si128 (cmp,a); + b_s = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(a_s, b_s); + } + + _NEON2SSE_INLINE __m128i _MM_MIN_EPI8(__m128i a, __m128i b) + { + __m128i cmp, resa, resb; + cmp = _mm_cmpgt_epi8 (b, a); + resa = _mm_and_si128 (cmp, a); + resb = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(resa, resb); + } + + _NEON2SSE_INLINE __m128i _MM_MIN_EPI32(__m128i a, __m128i b) + { + __m128i cmp, resa, resb; + cmp = _mm_cmpgt_epi32(b, a); + resa = _mm_and_si128 (cmp, a); + resb = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(resa, resb); + } + + _NEON2SSE_INLINE __m128i _MM_MIN_EPU16(__m128i a, __m128i b) + { + __m128i c8000, b_s, a_s, cmp; + c8000 = _mm_cmpeq_epi16 (a,a); //0xffff + c8000 = _mm_slli_epi16 (c8000, 15); //0x8000 + b_s = _mm_sub_epi16 (b, c8000); + a_s = _mm_sub_epi16 (a, c8000); + cmp = _mm_cmpgt_epi16 (b_s, a_s); //no unsigned comparison, need to go to signed + a_s = _mm_and_si128 (cmp,a); + b_s = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(a_s, b_s); + } + + _NEON2SSE_INLINE __m128i _MM_MIN_EPU32(__m128i a, __m128i b) + { + __m128i c80000000, b_s, a_s, cmp; + c80000000 = _mm_cmpeq_epi32 (a,a); //0xffffffff + c80000000 = _mm_slli_epi32 (c80000000, 31); //0x80000000 + b_s = _mm_sub_epi32 (b, c80000000); + a_s = _mm_sub_epi32 (a, c80000000); + cmp = _mm_cmpgt_epi32 (b_s, a_s); //no unsigned comparison, need to go to signed + a_s = _mm_and_si128 (cmp,a); + b_s = _mm_andnot_si128 (cmp,b); + return _mm_or_si128(a_s, b_s); + } + + _NEON2SSE_INLINE __m128i _MM_BLENDV_EPI8(__m128i a, __m128i b, __m128i mask) //this is NOT exact implementation of _mm_blendv_epi8 !!!!! - please see below + { + //it assumes mask is either 0xff or 0 always (like in all usecases below) while for the original _mm_blendv_epi8 only MSB mask byte matters. + __m128i a_masked, b_masked; + b_masked = _mm_and_si128 (mask,b); //use b if mask 0xff + a_masked = _mm_andnot_si128 (mask,a); + return _mm_or_si128(a_masked, b_masked); + } + + _NEON2SSE_INLINE __m128i _MM_PACKUS_EPI32(__m128i a, __m128i b) + { + __m128i a16, b16, res, reshi,cmp, zero; + zero = _mm_setzero_si128(); + a16 = _mm_shuffle_epi8 (a, *(__m128i*) mask8_32_even_odd); + b16 = _mm_shuffle_epi8 (b, *(__m128i*) mask8_32_even_odd); + res = _mm_unpacklo_epi64(a16, b16); //result without saturation + reshi = _mm_unpackhi_epi64(a16, b16); //hi part of result used for saturation + cmp = _mm_cmpgt_epi16(zero, reshi); //if cmp<0 the result should be zero + res = _mm_andnot_si128(cmp,res); //if cmp zero - do nothing, otherwise cmp <0 and the result is 0 + cmp = _mm_cmpgt_epi16(reshi,zero); //if cmp positive + return _mm_or_si128(res, cmp); //if cmp positive we are out of 16bits need to saturaate to 0xffff + } + + _NEON2SSE_INLINE __m128i _MM_PACKUS1_EPI32(__m128i a) + { + __m128i a16, res, reshi,cmp, zero; + zero = _mm_setzero_si128(); + a16 = _mm_shuffle_epi8 (a, *(__m128i*)mask8_32_even_odd); + reshi = _mm_unpackhi_epi64(a16, a16); //hi part of result used for saturation + cmp = _mm_cmpgt_epi16(zero, reshi); //if cmp<0 the result should be zero + res = _mm_andnot_si128(cmp, a16); //if cmp zero - do nothing, otherwise cmp <0 and the result is 0 + cmp = _mm_cmpgt_epi16(reshi,zero); //if cmp positive + return _mm_or_si128(res, cmp); //if cmp positive we are out of 16bits need to saturaate to 0xffff + } + + // method used by GCC with generic vector extensions + _NEON2SSE_INLINE __m128i _MM_MULLO_EPI32(__m128i a, __m128i b) + { + __m128i a_high = _mm_srli_epi64(a, 32); + __m128i low = _mm_mul_epu32(a, b); + __m128i b_high = _mm_srli_epi64(b, 32); + __m128i high = _mm_mul_epu32(a_high, b_high); + low = _mm_shuffle_epi32(low, _MM_SHUFFLE(0, 0, 2, 0)); + high = _mm_shuffle_epi32(high, _MM_SHUFFLE(0, 0, 2, 0)); + return _mm_unpacklo_epi32(low, high); + } + + _NEON2SSE_INLINE __m128i _MM_MUL_EPI32(__m128i a, __m128i b) + { + __m128i sign, zero, mul_us, a_neg, b_neg, mul_us_neg; + sign = _mm_xor_si128 (a, b); + sign = _mm_srai_epi32 (sign, 31); //promote sign bit to all fields, all fff if negative and all 0 if positive + sign = _mm_shuffle_epi32(sign, _MM_SHUFFLE(2, 2, 0, 0)); //promote sign bit to 3 and 1st data lanes + zero = _mm_setzero_si128(); + a_neg = _mm_abs_epi32 (a); //negate a and b + b_neg = _mm_abs_epi32 (b); //negate a and b + mul_us = _mm_mul_epu32 (a_neg, b_neg); //uses 0 and 2nd data lanes, (abs), the multiplication gives 64 bit result + mul_us_neg = _mm_sub_epi64(zero, mul_us); + mul_us_neg = _mm_and_si128(sign, mul_us_neg); + mul_us = _mm_andnot_si128(sign, mul_us); + return _mm_or_si128 (mul_us, mul_us_neg); + } + + _NEON2SSE_INLINE __m128i _MM_CMPEQ_EPI64(__m128i a, __m128i b) + { + __m128i res; + res = _mm_cmpeq_epi32 (a, b); + return _mm_shuffle_epi32 (res, 1 | (1 << 2) | (3 << 4) | (3 << 6)); //copy the information from hi to low part of the 64 bit data + } +#endif //SSE4 + +//the special case of functions working only for 32 bits, no SSE4 +_NEON2SSE_INLINE __m128i _MM_INSERT_EPI64_32(__m128i vec, int64_t p, const int LANE) +{ + _NEON2SSE_ALIGN_16 uint64_t pvec[2] = {0,0}; + _NEON2SSE_ALIGN_16 uint64_t mask[2] = {0xffffffffffffffff, 0xffffffffffffffff}; + __m128i vec_masked, p_masked; + pvec[LANE] = p; + mask[LANE] = 0x0; + vec_masked = _mm_and_si128 (*(__m128i*)mask,vec); //ready for p + p_masked = _mm_andnot_si128 (*(__m128i*)mask,*(__m128i*)pvec); //ready for vec + return _mm_or_si128(vec_masked, p_masked); +} + +_NEON2SSE_INLINE int64_t _MM_EXTRACT_EPI64_32(__m128i val, const int LANE) +{ + _NEON2SSE_ALIGN_16 int64_t tmp[2]; + _mm_store_si128((__m128i*)tmp, val); + return tmp[LANE]; +} + +#ifndef _NEON2SSE_64BIT_SSE4 +# define _MM_INSERT_EPI64 _MM_INSERT_EPI64_32 +# define _MM_EXTRACT_EPI64 _MM_EXTRACT_EPI64_32 +#endif + +_NEON2SSESTORAGE int32x4_t vqd_s32(int32x4_t a); //Doubling saturation for signed ints +_NEON2SSE_INLINE int32x4_t vqd_s32(int32x4_t a) +{ + //Overflow happens only if a and sum have the opposite signs + __m128i c7fffffff, res, res_sat, res_xor_a; + c7fffffff = _mm_set1_epi32(0x7fffffff); + res = _mm_slli_epi32 (a, 1); // res = a*2 + res_sat = _mm_srli_epi32(a, 31); + res_sat = _mm_add_epi32(res_sat, c7fffffff); + res_xor_a = _mm_xor_si128(res, a); + res_xor_a = _mm_srai_epi32(res_xor_a,31); //propagate the sigh bit, all ffff if <0 all ones otherwise + res_sat = _mm_and_si128(res_xor_a, res_sat); + res = _mm_andnot_si128(res_xor_a, res); + return _mm_or_si128(res, res_sat); +} + + +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +//************************************************************************* +//************************************************************************* +//***************** Functions redefinition\implementatin starts here ***** +//************************************************************************* +//************************************************************************* +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +/*If the unified intrinsics solutions is necessary please define your SSE intrinsics wrap here like in the following sample: +#ifdef ARM +#define vector_addq_s32 _mm_add_epi32 +#else //if we have IA +#define vector_addq_s32 vadd_s32 +#endif + +******************************************************************************************** +Functions below are organised in the following way: + +Each NEON intrinsic function has one of the following options: +1. its x86 full equivalent SSE intrinsic - in this case x86 version just follows the NEON one under the corresponding #define statement +2. x86 implementation using more than one x86 intrinsics. In this case it is shaped as inlined C function with return statement +3. the reference to the NEON function returning the same result and implemented in x86 as above. In this case it is shaped as matching NEON function definition +4. for about 5% of functions due to the corresponding x86 SIMD unavailability or inefficiency in terms of performance +the serial implementation is provided along with the corresponding compiler warning. If these functions are on your app critical path +- please consider such functions removal from your code. +*/ + +//*********************************************************************** +//************************ Vector add ***************************** +//*********************************************************************** +_NEON2SSESTORAGE int8x8_t vadd_s8(int8x8_t a, int8x8_t b); // VADD.I8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vadd_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_add_epi8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vadd_s16(int16x4_t a, int16x4_t b); // VADD.I16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vadd_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_add_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vadd_s32(int32x2_t a, int32x2_t b); // VADD.I32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vadd_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(_mm_add_epi32(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vadd_s64(int64x1_t a, int64x1_t b); // VADD.I64 d0,d0,d0 +_NEON2SSE_INLINE int64x1_t vadd_s64(int64x1_t a, int64x1_t b) +{ + int64x1_t res64; + res64.m64_i64[0] = a.m64_i64[0] + b.m64_i64[0]; + return res64; +} + + +_NEON2SSESTORAGE float32x2_t vadd_f32(float32x2_t a, float32x2_t b); // VADD.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vadd_f32(float32x2_t a, float32x2_t b) +{ + __m128 res; + __m64_128 res64; + res = _mm_add_ps(_pM128(a),_pM128(b)); //SSE, use only low 64 bits + _M64f(res64, res); + return res64; +} + +_NEON2SSE_GLOBAL uint8x8_t vadd_u8(uint8x8_t a, uint8x8_t b); // VADD.I8 d0,d0,d0 +#define vadd_u8 vadd_s8 + +_NEON2SSE_GLOBAL uint16x4_t vadd_u16(uint16x4_t a, uint16x4_t b); // VADD.I16 d0,d0,d0 +#define vadd_u16 vadd_s16 + +_NEON2SSE_GLOBAL uint32x2_t vadd_u32(uint32x2_t a, uint32x2_t b); // VADD.I32 d0,d0,d0 +#define vadd_u32 vadd_s32 + +_NEON2SSESTORAGE uint64x1_t vadd_u64(uint64x1_t a, uint64x1_t b); // VADD.I64 d0,d0,d0 +_NEON2SSE_INLINE uint64x1_t vadd_u64(uint64x1_t a, uint64x1_t b) +{ + uint64x1_t res64; + res64.m64_u64[0] = a.m64_u64[0] + b.m64_u64[0]; + return res64; +} + + +_NEON2SSE_GLOBAL int8x16_t vaddq_s8(int8x16_t a, int8x16_t b); // VADD.I8 q0,q0,q0 +#define vaddq_s8 _mm_add_epi8 + +_NEON2SSE_GLOBAL int16x8_t vaddq_s16(int16x8_t a, int16x8_t b); // VADD.I16 q0,q0,q0 +#define vaddq_s16 _mm_add_epi16 + +_NEON2SSE_GLOBAL int32x4_t vaddq_s32(int32x4_t a, int32x4_t b); // VADD.I32 q0,q0,q0 +#define vaddq_s32 _mm_add_epi32 + +_NEON2SSE_GLOBAL int64x2_t vaddq_s64(int64x2_t a, int64x2_t b); // VADD.I64 q0,q0,q0 +#define vaddq_s64 _mm_add_epi64 + +_NEON2SSE_GLOBAL float32x4_t vaddq_f32(float32x4_t a, float32x4_t b); // VADD.F32 q0,q0,q0 +#define vaddq_f32 _mm_add_ps + +_NEON2SSE_GLOBAL uint8x16_t vaddq_u8(uint8x16_t a, uint8x16_t b); // VADD.I8 q0,q0,q0 +#define vaddq_u8 _mm_add_epi8 + +_NEON2SSE_GLOBAL uint16x8_t vaddq_u16(uint16x8_t a, uint16x8_t b); // VADD.I16 q0,q0,q0 +#define vaddq_u16 _mm_add_epi16 + +_NEON2SSE_GLOBAL uint32x4_t vaddq_u32(uint32x4_t a, uint32x4_t b); // VADD.I32 q0,q0,q0 +#define vaddq_u32 _mm_add_epi32 + +_NEON2SSE_GLOBAL uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b); // VADD.I64 q0,q0,q0 +#define vaddq_u64 _mm_add_epi64 + +//**************************** Vector long add *****************************: +//*********************************************************************** +//Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width. +_NEON2SSESTORAGE int16x8_t vaddl_s8(int8x8_t a, int8x8_t b); // VADDL.S8 q0,d0,d0 +_NEON2SSE_INLINE int16x8_t vaddl_s8(int8x8_t a, int8x8_t b) // VADDL.S8 q0,d0,d0 +{ + __m128i a16, b16; + a16 = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE4.1, + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); //SSE4.1, + return _mm_add_epi16 (a16, b16); +} + +_NEON2SSESTORAGE int32x4_t vaddl_s16(int16x4_t a, int16x4_t b); // VADDL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vaddl_s16(int16x4_t a, int16x4_t b) // VADDL.S16 q0,d0,d0 +{ + __m128i a32, b32; + a32 = _MM_CVTEPI16_EPI32 (_pM128i(a)); //SSE4.1 + b32 = _MM_CVTEPI16_EPI32 (_pM128i(b)); //SSE4.1 + return _mm_add_epi32 (a32, b32); +} + +_NEON2SSESTORAGE int64x2_t vaddl_s32(int32x2_t a, int32x2_t b); // VADDL.S32 q0,d0,d0 +_NEON2SSE_INLINE int64x2_t vaddl_s32(int32x2_t a, int32x2_t b) // VADDL.S32 q0,d0,d0 +{ + //may be not optimal + __m128i a64, b64; + a64 = _MM_CVTEPI32_EPI64 (_pM128i(a)); //SSE4.1 + b64 = _MM_CVTEPI32_EPI64 (_pM128i(b)); //SSE4.1 + return _mm_add_epi64 ( a64, b64); +} + +_NEON2SSESTORAGE uint16x8_t vaddl_u8(uint8x8_t a, uint8x8_t b); // VADDL.U8 q0,d0,d0 +_NEON2SSE_INLINE uint16x8_t vaddl_u8(uint8x8_t a, uint8x8_t b) // VADDL.U8 q0,d0,d0 +{ + __m128i a16, b16; + a16 = _MM_CVTEPU8_EPI16 (_pM128i(a)); //SSE4.1 + b16 = _MM_CVTEPU8_EPI16 (_pM128i(b)); //SSE4.1 + return _mm_add_epi16 (a16, b16); +} + +_NEON2SSESTORAGE uint32x4_t vaddl_u16(uint16x4_t a, uint16x4_t b); // VADDL.s16 q0,d0,d0 +_NEON2SSE_INLINE uint32x4_t vaddl_u16(uint16x4_t a, uint16x4_t b) // VADDL.s16 q0,d0,d0 +{ + __m128i a32, b32; + a32 = _MM_CVTEPU16_EPI32 (_pM128i(a)); //SSE4.1 + b32 = _MM_CVTEPU16_EPI32 (_pM128i(b)); //SSE4.1 + return _mm_add_epi32 (a32, b32); +} + +_NEON2SSESTORAGE uint64x2_t vaddl_u32(uint32x2_t a, uint32x2_t b); // VADDL.U32 q0,d0,d0 +_NEON2SSE_INLINE uint64x2_t vaddl_u32(uint32x2_t a, uint32x2_t b) // VADDL.U32 q0,d0,d0 +{ + //may be not optimal + __m128i a64, b64; + a64 = _MM_CVTEPU32_EPI64 (_pM128i(a)); //SSE4.1 + b64 = _MM_CVTEPU32_EPI64 (_pM128i(b)); //SSE4.1 + return _mm_add_epi64 (a64, b64); +} + +//*************** Vector wide add: vaddw_. Vr[i]:=Va[i]+Vb[i] ****************** +//*************** ********************************************************************* +_NEON2SSESTORAGE int16x8_t vaddw_s8(int16x8_t a, int8x8_t b); // VADDW.S8 q0,q0,d0 +_NEON2SSE_INLINE int16x8_t vaddw_s8(int16x8_t a, int8x8_t b) // VADDW.S8 q0,q0,d0 +{ + __m128i b16; + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); //SSE4.1, + return _mm_add_epi16 (a, b16); +} + +_NEON2SSESTORAGE int32x4_t vaddw_s16(int32x4_t a, int16x4_t b); // VADDW.S16 q0,q0,d0 +_NEON2SSE_INLINE int32x4_t vaddw_s16(int32x4_t a, int16x4_t b) // VADDW.S16 q0,q0,d0 +{ + __m128i b32; + b32 = _MM_CVTEPI16_EPI32(_pM128i(b)); //SSE4.1, + return _mm_add_epi32 (a, b32); +} + +_NEON2SSESTORAGE int64x2_t vaddw_s32(int64x2_t a, int32x2_t b); // VADDW.S32 q0,q0,d0 +_NEON2SSE_INLINE int64x2_t vaddw_s32(int64x2_t a, int32x2_t b) // VADDW.S32 q0,q0,d0 +{ + __m128i b64; + b64 = _MM_CVTEPI32_EPI64 (_pM128i(b)); //SSE4.1 + return _mm_add_epi64 (a, b64); +} + +_NEON2SSESTORAGE uint16x8_t vaddw_u8(uint16x8_t a, uint8x8_t b); // VADDW.U8 q0,q0,d0 +_NEON2SSE_INLINE uint16x8_t vaddw_u8(uint16x8_t a, uint8x8_t b) // VADDW.U8 q0,q0,d0 +{ + __m128i b16; + b16 = _MM_CVTEPU8_EPI16 (_pM128i(b)); //SSE4.1 + return _mm_add_epi16 (a, b16); +} + +_NEON2SSESTORAGE uint32x4_t vaddw_u16(uint32x4_t a, uint16x4_t b); // VADDW.s16 q0,q0,d0 +_NEON2SSE_INLINE uint32x4_t vaddw_u16(uint32x4_t a, uint16x4_t b) // VADDW.s16 q0,q0,d0 +{ + __m128i b32; + b32 = _MM_CVTEPU16_EPI32 (_pM128i(b)); //SSE4.1 + return _mm_add_epi32 (a, b32); +} + +_NEON2SSESTORAGE uint64x2_t vaddw_u32(uint64x2_t a, uint32x2_t b); // VADDW.U32 q0,q0,d0 +_NEON2SSE_INLINE uint64x2_t vaddw_u32(uint64x2_t a, uint32x2_t b) // VADDW.U32 q0,q0,d0 +{ + __m128i b64; + b64 = _MM_CVTEPU32_EPI64 (_pM128i(b)); //SSE4.1 + return _mm_add_epi64 (a, b64); +} + +//******************************Vector halving add: vhadd -> Vr[i]:=(Va[i]+Vb[i])>>1 , result truncated ******************************* +//************************************************************************************************************************* +_NEON2SSESTORAGE int8x8_t vhadd_s8(int8x8_t a, int8x8_t b); // VHADD.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vhadd_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(vhaddq_s8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vhadd_s16(int16x4_t a, int16x4_t b); // VHADD.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vhadd_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64( vhaddq_s16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vhadd_s32(int32x2_t a, int32x2_t b); // VHADD.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vhadd_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64( vhaddq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint8x8_t vhadd_u8(uint8x8_t a, uint8x8_t b); // VHADD.w d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vhadd_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64( vhaddq_u8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vhadd_u16(uint16x4_t a, uint16x4_t b); // VHADD.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vhadd_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64( vhaddq_u16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vhadd_u32(uint32x2_t a, uint32x2_t b); // VHADD.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vhadd_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64( vhaddq_u32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int8x16_t vhaddq_s8(int8x16_t a, int8x16_t b); // VHADD.S8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vhaddq_s8(int8x16_t a, int8x16_t b) +{ + //need to avoid internal overflow, will use the (x&y)+((x^y)>>1). + __m128i tmp1, tmp2; + tmp1 = _mm_and_si128(a,b); + tmp2 = _mm_xor_si128(a,b); + tmp2 = vshrq_n_s8(tmp2,1); + return _mm_add_epi8(tmp1,tmp2); +} + +_NEON2SSESTORAGE int16x8_t vhaddq_s16(int16x8_t a, int16x8_t b); // VHADD.S1 6 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vhaddq_s16(int16x8_t a, int16x8_t b) +{ + //need to avoid internal overflow, will use the (x&y)+((x^y)>>1). + __m128i tmp1, tmp2; + tmp1 = _mm_and_si128(a,b); + tmp2 = _mm_xor_si128(a,b); + tmp2 = _mm_srai_epi16(tmp2,1); + return _mm_add_epi16(tmp1,tmp2); +} + +_NEON2SSESTORAGE int32x4_t vhaddq_s32(int32x4_t a, int32x4_t b); // VHADD.S32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vhaddq_s32(int32x4_t a, int32x4_t b) // VHADD.S32 q0,q0,q0 +{ + //need to avoid internal overflow, will use the (x&y)+((x^y)>>1). + __m128i tmp1, tmp2; + tmp1 = _mm_and_si128(a,b); + tmp2 = _mm_xor_si128(a,b); + tmp2 = _mm_srai_epi32(tmp2,1); + return _mm_add_epi32(tmp1,tmp2); +} + +_NEON2SSESTORAGE uint8x16_t vhaddq_u8(uint8x16_t a, uint8x16_t b); // VHADD.U8 q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vhaddq_u8(uint8x16_t a, uint8x16_t b) // VHADD.U8 q0,q0,q0 +{ + __m128i c1, sum, res; + c1 = _mm_set1_epi8(1); + sum = _mm_avg_epu8(a, b); //result is rounded, need to compensate it + res = _mm_xor_si128(a, b); //for rounding compensation + res = _mm_and_si128(res,c1); //for rounding compensation + return _mm_sub_epi8 (sum, res); //actual rounding compensation +} + +_NEON2SSESTORAGE uint16x8_t vhaddq_u16(uint16x8_t a, uint16x8_t b); // VHADD.s16 q0,q0,q0 +_NEON2SSE_INLINE uint16x8_t vhaddq_u16(uint16x8_t a, uint16x8_t b) // VHADD.s16 q0,q0,q0 +{ + __m128i sum, res; + sum = _mm_avg_epu16(a, b); //result is rounded, need to compensate it + res = _mm_xor_si128(a, b); //for rounding compensation + res = _mm_slli_epi16 (res,15); //shift left then back right to + res = _mm_srli_epi16 (res,15); //get 1 or zero + return _mm_sub_epi16 (sum, res); //actual rounding compensation +} + +_NEON2SSESTORAGE uint32x4_t vhaddq_u32(uint32x4_t a, uint32x4_t b); // VHADD.U32 q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vhaddq_u32(uint32x4_t a, uint32x4_t b) // VHADD.U32 q0,q0,q0 +{ + //need to avoid internal overflow, will use the (x&y)+((x^y)>>1). + __m128i tmp1, tmp2; + tmp1 = _mm_and_si128(a,b); + tmp2 = _mm_xor_si128(a,b); + tmp2 = _mm_srli_epi32(tmp2,1); + return _mm_add_epi32(tmp1,tmp2); +} + +//************************Vector rounding halving add: vrhadd{q}_. Vr[i]:=(Va[i]+Vb[i]+1)>>1 *************************** +//***************************************************************************************************************************** +_NEON2SSESTORAGE int8x8_t vrhadd_s8(int8x8_t a, int8x8_t b); // VRHADD.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vrhadd_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(vrhaddq_s8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vrhadd_s16(int16x4_t a, int16x4_t b); // VRHADD.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vrhadd_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vrhaddq_s16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vrhadd_s32(int32x2_t a, int32x2_t b); // VRHADD.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vrhadd_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vrhaddq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint8x8_t vrhadd_u8(uint8x8_t a, uint8x8_t b); // VRHADD.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vrhadd_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(_mm_avg_epu8(_pM128i(a),_pM128i(b))); //SSE, result rounding!!! +} + + +_NEON2SSESTORAGE uint16x4_t vrhadd_u16(uint16x4_t a, uint16x4_t b); // VRHADD.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vrhadd_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(_mm_avg_epu16(_pM128i(a),_pM128i(b))); //SSE, result rounding!!! +} + + +_NEON2SSESTORAGE uint32x2_t vrhadd_u32(uint32x2_t a, uint32x2_t b); // VRHADD.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vrhadd_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64(vrhaddq_u32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int8x16_t vrhaddq_s8(int8x16_t a, int8x16_t b); // VRHADD.S8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vrhaddq_s8(int8x16_t a, int8x16_t b) // VRHADD.S8 q0,q0,q0 +{ + //no signed average in x86 SIMD, go to unsigned + __m128i c128, au, bu, sum; + c128 = _mm_set1_epi8(-128); //(int8_t)0x80 + au = _mm_sub_epi8(a, c128); //add 128 + bu = _mm_sub_epi8(b, c128); //add 128 + sum = _mm_avg_epu8(au, bu); + return _mm_add_epi8 (sum, c128); //sub 128 +} + +_NEON2SSESTORAGE int16x8_t vrhaddq_s16(int16x8_t a, int16x8_t b); // VRHADD.S16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vrhaddq_s16(int16x8_t a, int16x8_t b) // VRHADD.S16 q0,q0,q0 +{ + //no signed average in x86 SIMD, go to unsigned + __m128i cx8000, au, bu, sum; + cx8000 = _mm_set1_epi16(-32768); //(int16_t)0x8000 + au = _mm_sub_epi16(a, cx8000); //add 32768 + bu = _mm_sub_epi16(b, cx8000); //add 32768 + sum = _mm_avg_epu16(au, bu); + return _mm_add_epi16 (sum, cx8000); //sub 32768 +} + +_NEON2SSESTORAGE int32x4_t vrhaddq_s32(int32x4_t a, int32x4_t b); // VRHADD.S32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vrhaddq_s32(int32x4_t a, int32x4_t b) +{ + //need to avoid overflow + __m128i a2, b2, res, sum; + a2 = _mm_srai_epi32(a,1); //a2=a/2; + b2 = _mm_srai_epi32(b,1); // b2=b/2; + res = _mm_or_si128(a,b); //for rounding + res = _mm_slli_epi32 (res,31); //shift left then back right to + res = _mm_srli_epi32 (res,31); //get 1 or zero + sum = _mm_add_epi32(a2,b2); + return _mm_add_epi32(sum,res); +} + +_NEON2SSE_GLOBAL uint8x16_t vrhaddq_u8(uint8x16_t a, uint8x16_t b); // VRHADD.U8 q0,q0,q0 +#define vrhaddq_u8 _mm_avg_epu8 //SSE2, results rounded + +_NEON2SSE_GLOBAL uint16x8_t vrhaddq_u16(uint16x8_t a, uint16x8_t b); // VRHADD.s16 q0,q0,q0 +#define vrhaddq_u16 _mm_avg_epu16 //SSE2, results rounded + + +_NEON2SSESTORAGE uint32x4_t vrhaddq_u32(uint32x4_t a, uint32x4_t b); // VRHADD.U32 q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vrhaddq_u32(uint32x4_t a, uint32x4_t b) // VRHADD.U32 q0,q0,q0 +{ + //need to avoid overflow + __m128i a2, b2, res, sum; + a2 = _mm_srli_epi32(a,1); //a2=a/2; + b2 = _mm_srli_epi32(b,1); // b2=b/2; + res = _mm_or_si128(a,b); //for rounding + res = _mm_slli_epi32 (res,31); //shift left then back right to + res = _mm_srli_epi32 (res,31); //get 1 or zero + sum = _mm_add_epi32(a2,b2); + return _mm_add_epi32(sum,res); +} + +//****************** VQADD: Vector saturating add ************************ +//************************************************************************ +_NEON2SSESTORAGE int8x8_t vqadd_s8(int8x8_t a, int8x8_t b); // VQADD.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vqadd_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_adds_epi8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vqadd_s16(int16x4_t a, int16x4_t b); // VQADD.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vqadd_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_adds_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vqadd_s32(int32x2_t a, int32x2_t b); // VQADD.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vqadd_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vqaddq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vqadd_s64(int64x1_t a, int64x1_t b); // VQADD.S64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vqadd_s64(int64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int64x1_t res; + uint64_t a64, b64; + a64 = a.m64_u64[0]; + b64 = b.m64_u64[0]; + res.m64_u64[0] = a64 + b64; + a64 = (a64 >> 63) + (~_SIGNBIT64); + if ((int64_t)((b64 ^ a64) | ~(res.m64_u64[0] ^ b64))>=0) { + res.m64_u64[0] = a64; + } + return res; +} + +_NEON2SSESTORAGE uint8x8_t vqadd_u8(uint8x8_t a, uint8x8_t b); // VQADD.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vqadd_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(_mm_adds_epu8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vqadd_u16(uint16x4_t a, uint16x4_t b); // VQADD.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vqadd_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(_mm_adds_epu16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vqadd_u32(uint32x2_t a, uint32x2_t b); // VQADD.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vqadd_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64(vqaddq_u32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint64x1_t vqadd_u64(uint64x1_t a, uint64x1_t b); // VQADD.U64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vqadd_u64(uint64x1_t a, uint64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + _NEON2SSE_ALIGN_16 uint64_t a64, b64; + uint64x1_t res; + a64 = a.m64_u64[0]; + b64 = b.m64_u64[0]; + res.m64_u64[0] = a64 + b64; + if (res.m64_u64[0] < a64) { + res.m64_u64[0] = ~(uint64_t)0; + } + return res; +} + +_NEON2SSE_GLOBAL int8x16_t vqaddq_s8(int8x16_t a, int8x16_t b); // VQADD.S8 q0,q0,q0 +#define vqaddq_s8 _mm_adds_epi8 + +_NEON2SSE_GLOBAL int16x8_t vqaddq_s16(int16x8_t a, int16x8_t b); // VQADD.S16 q0,q0,q0 +#define vqaddq_s16 _mm_adds_epi16 + +_NEON2SSESTORAGE int32x4_t vqaddq_s32(int32x4_t a, int32x4_t b); // VQADD.S32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vqaddq_s32(int32x4_t a, int32x4_t b) +{ + //no corresponding x86 SIMD soulution, special tricks are necessary. Overflow happens only if a and b have the same sign and sum has the opposite sign + __m128i c7fffffff, res, res_sat, res_xor_a, b_xor_a_; + c7fffffff = _mm_set1_epi32(0x7fffffff); + res = _mm_add_epi32(a, b); + res_sat = _mm_srli_epi32(a, 31); + res_sat = _mm_add_epi32(res_sat, c7fffffff); + res_xor_a = _mm_xor_si128(res, a); + b_xor_a_ = _mm_xor_si128(b, a); + res_xor_a = _mm_andnot_si128(b_xor_a_, res_xor_a); + res_xor_a = _mm_srai_epi32(res_xor_a,31); //propagate the sigh bit, all ffff if <0 all ones otherwise + res_sat = _mm_and_si128(res_xor_a, res_sat); + res = _mm_andnot_si128(res_xor_a, res); + return _mm_or_si128(res, res_sat); +} + +_NEON2SSESTORAGE int64x2_t vqaddq_s64(int64x2_t a, int64x2_t b); // VQADD.S64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqaddq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + _NEON2SSE_ALIGN_16 uint64_t atmp[2], btmp[2], res[2]; + _mm_store_si128((__m128i*)atmp, a); + _mm_store_si128((__m128i*)btmp, b); + res[0] = atmp[0] + btmp[0]; + res[1] = atmp[1] + btmp[1]; + + atmp[0] = (atmp[0] >> 63) + (~_SIGNBIT64); + atmp[1] = (atmp[1] >> 63) + (~_SIGNBIT64); + + if ((int64_t)((btmp[0] ^ atmp[0]) | ~(res[0] ^ btmp[0]))>=0) { + res[0] = atmp[0]; + } + if ((int64_t)((btmp[1] ^ atmp[1]) | ~(res[1] ^ btmp[1]))>=0) { + res[1] = atmp[1]; + } + return _mm_load_si128((__m128i*)res); +} + +_NEON2SSE_GLOBAL uint8x16_t vqaddq_u8(uint8x16_t a, uint8x16_t b); // VQADD.U8 q0,q0,q0 +#define vqaddq_u8 _mm_adds_epu8 + +_NEON2SSE_GLOBAL uint16x8_t vqaddq_u16(uint16x8_t a, uint16x8_t b); // VQADD.s16 q0,q0,q0 +#define vqaddq_u16 _mm_adds_epu16 + +_NEON2SSESTORAGE uint32x4_t vqaddq_u32(uint32x4_t a, uint32x4_t b); // VQADD.U32 q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vqaddq_u32(uint32x4_t a, uint32x4_t b) +{ + __m128i c80000000, cmp, subsum, suba, sum; + c80000000 = _mm_set1_epi32 (0x80000000); + sum = _mm_add_epi32 (a, b); + subsum = _mm_sub_epi32 (sum, c80000000); + suba = _mm_sub_epi32 (a, c80000000); + cmp = _mm_cmpgt_epi32 ( suba, subsum); //no unsigned comparison, need to go to signed + return _mm_or_si128 (sum, cmp); //saturation +} + +_NEON2SSESTORAGE uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b); // VQADD.U64 q0,q0,q0 +#ifdef USE_SSE4 + _NEON2SSE_INLINE uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b) + { + __m128i c80000000, sum, cmp, suba, subsum; + c80000000 = _mm_set_epi32 (0x80000000, 0x0, 0x80000000, 0x0); + sum = _mm_add_epi64 (a, b); + subsum = _mm_sub_epi64 (sum, c80000000); + suba = _mm_sub_epi64 (a, c80000000); + cmp = _mm_cmpgt_epi64 ( suba, subsum); //no unsigned comparison, need to go to signed, SSE4.2!!! + return _mm_or_si128 (sum, cmp); //saturation + } +#else + _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) + { + _NEON2SSE_ALIGN_16 uint64_t atmp[2], btmp[2], res[2]; + _mm_store_si128((__m128i*)atmp, a); + _mm_store_si128((__m128i*)btmp, b); + res[0] = atmp[0] + btmp[0]; + res[1] = atmp[1] + btmp[1]; + if (res[0] < atmp[0]) res[0] = ~(uint64_t)0; + if (res[1] < atmp[1]) res[1] = ~(uint64_t)0; + return _mm_load_si128((__m128i*)(res)); + } +#endif + + +//******************* Vector add high half (truncated) ****************** +//************************************************************************ +_NEON2SSESTORAGE int8x8_t vaddhn_s16(int16x8_t a, int16x8_t b); // VADDHN.I16 d0,q0,q0 +_NEON2SSE_INLINE int8x8_t vaddhn_s16(int16x8_t a, int16x8_t b) // VADDHN.I16 d0,q0,q0 +{ + int8x8_t res64; + __m128i sum; + sum = _mm_add_epi16 (a, b); + sum = _mm_srai_epi16 (sum, 8); + sum = _mm_packs_epi16 (sum, sum); //use 64 low bits only + return64(sum); +} + +_NEON2SSESTORAGE int16x4_t vaddhn_s32(int32x4_t a, int32x4_t b); // VADDHN.I32 d0,q0,q0 +_NEON2SSE_INLINE int16x4_t vaddhn_s32(int32x4_t a, int32x4_t b) // VADDHN.I32 d0,q0,q0 +{ + int16x4_t res64; + __m128i sum; + sum = _mm_add_epi32 (a, b); + sum = _mm_srai_epi32(sum, 16); + sum = _mm_packs_epi32 (sum, sum); //use 64 low bits only + return64(sum); +} + +_NEON2SSESTORAGE int32x2_t vaddhn_s64(int64x2_t a, int64x2_t b); // VADDHN.I64 d0,q0,q0 +_NEON2SSE_INLINE int32x2_t vaddhn_s64(int64x2_t a, int64x2_t b) +{ + int32x2_t res64; + __m128i sum; + sum = _mm_add_epi64 (a, b); + sum = _mm_shuffle_epi32(sum, 1 | (3 << 2) | (0 << 4) | (2 << 6)); + return64(sum); +} + +_NEON2SSESTORAGE uint8x8_t vaddhn_u16(uint16x8_t a, uint16x8_t b); // VADDHN.I16 d0,q0,q0 +_NEON2SSE_INLINE uint8x8_t vaddhn_u16(uint16x8_t a, uint16x8_t b) // VADDHN.I16 d0,q0,q0 +{ + uint8x8_t res64; + __m128i sum; + sum = _mm_add_epi16 (a, b); + sum = _mm_srli_epi16 (sum, 8); + sum = _mm_packus_epi16 (sum,sum); //use 64 low bits only + return64(sum); +} + +_NEON2SSESTORAGE uint16x4_t vaddhn_u32(uint32x4_t a, uint32x4_t b); // VADDHN.I32 d0,q0,q0 +_NEON2SSE_INLINE uint16x4_t vaddhn_u32(uint32x4_t a, uint32x4_t b) // VADDHN.I32 d0,q0,q0 +{ + uint16x4_t res64; + __m128i sum; + sum = _mm_add_epi32 (a, b); + sum = _mm_srli_epi32 (sum, 16); +#ifdef USE_SSE4 + sum = _MM_PACKUS1_EPI32 (sum); //use 64 low bits only +#else + sum = _mm_shuffle_epi8 (sum, *(__m128i*) mask8_32_even_odd); //go to 16 bits +#endif + return64(sum); +} + +_NEON2SSE_GLOBAL uint32x2_t vaddhn_u64(uint64x2_t a, uint64x2_t b); // VADDHN.I64 d0,q0,q0 +#define vaddhn_u64 vaddhn_s64 + +//*********** Vector rounding add high half: vraddhn_ ******************. +//*************************************************************************** +_NEON2SSESTORAGE int8x8_t vraddhn_s16(int16x8_t a, int16x8_t b); // VRADDHN.I16 d0,q0,q0 +_NEON2SSE_INLINE int8x8_t vraddhn_s16(int16x8_t a, int16x8_t b) // VRADDHN.I16 d0,q0,q0 +{ + int8x8_t res64; + __m128i sum, mask1; + sum = _mm_add_epi16 (a, b); + mask1 = _mm_slli_epi16(sum, 8); //shift left then back right to + mask1 = _mm_srli_epi16(mask1, 15); //get 7-th bit 1 or zero + sum = _mm_srai_epi16 (sum, 8); //get high half + sum = _mm_add_epi16 (sum, mask1); //actual rounding + sum = _mm_packs_epi16 (sum, sum); + return64(sum); +} + +_NEON2SSESTORAGE int16x4_t vraddhn_s32(int32x4_t a, int32x4_t b); // VRADDHN.I32 d0,q0,q0 +_NEON2SSE_INLINE int16x4_t vraddhn_s32(int32x4_t a, int32x4_t b) // VRADDHN.I32 d0,q0,q0 +{ + //SIMD may be not optimal, serial may be faster + int16x4_t res64; + __m128i sum, mask1; + sum = _mm_add_epi32 (a, b); + mask1 = _mm_slli_epi32(sum, 16); //shift left then back right to + mask1 = _mm_srli_epi32(mask1,31); //get 15-th bit 1 or zero + sum = _mm_srai_epi32 (sum, 16); //get high half + sum = _mm_add_epi32 (sum, mask1); //actual rounding + sum = _mm_packs_epi32 (sum, sum); + return64(sum); +} + +_NEON2SSESTORAGE int32x2_t vraddhn_s64(int64x2_t a, int64x2_t b); // VRADDHN.I64 d0,q0,q0 +_NEON2SSE_INLINE int32x2_t vraddhn_s64(int64x2_t a, int64x2_t b) +{ + //SIMD may be not optimal, serial may be faster + int32x2_t res64; + __m128i sum, mask1; + sum = _mm_add_epi64 (a, b); + mask1 = _mm_slli_epi64(sum, 32); //shift left then back right to + mask1 = _mm_srli_epi64(mask1,31); //get 31-th bit 1 or zero + sum = _mm_add_epi32 (sum, mask1); //actual high half rounding + sum = _mm_shuffle_epi32(sum, 1 | (3 << 2) | (1 << 4) | (3 << 6)); + return64(sum); +} + +_NEON2SSESTORAGE uint8x8_t vraddhn_u16(uint16x8_t a, uint16x8_t b); // VRADDHN.I16 d0,q0,q0 +_NEON2SSE_INLINE uint8x8_t vraddhn_u16(uint16x8_t a, uint16x8_t b) // VRADDHN.I16 d0,q0,q0 +{ + uint8x8_t res64; + __m128i sum, mask1; + sum = _mm_add_epi16 (a, b); + mask1 = _mm_slli_epi16(sum, 8); //shift left then back right to + mask1 = _mm_srli_epi16(mask1, 15); //get 7-th bit 1 or zero + sum = _mm_srai_epi16 (sum, 8); //get high half + sum = _mm_add_epi16 (sum, mask1); //actual rounding + sum = _mm_packus_epi16 (sum, sum); + return64(sum); +} + +_NEON2SSESTORAGE uint16x4_t vraddhn_u32(uint32x4_t a, uint32x4_t b); // VRADDHN.I32 d0,q0,q0 +_NEON2SSE_INLINE uint16x4_t vraddhn_u32(uint32x4_t a, uint32x4_t b) +{ + //SIMD may be not optimal, serial may be faster + uint16x4_t res64; + __m128i sum, mask1; + sum = _mm_add_epi32 (a, b); + mask1 = _mm_slli_epi32(sum, 16); //shift left then back right to + mask1 = _mm_srli_epi32(mask1,31); //get 15-th bit 1 or zero + sum = _mm_srai_epi32 (sum, 16); //get high half + sum = _mm_add_epi32 (sum, mask1); //actual rounding + sum = _MM_PACKUS1_EPI32 (sum); + return64(sum); +} + +_NEON2SSE_GLOBAL uint32x2_t vraddhn_u64(uint64x2_t a, uint64x2_t b); // VRADDHN.I64 d0,q0,q0 +#define vraddhn_u64 vraddhn_s64 + +//********************************************************************************** +//********* Multiplication ************************************* +//************************************************************************************** + +//Vector multiply: vmul -> Vr[i] := Va[i] * Vb[i] +//As we don't go to wider result functions are equal to "multiply low" in x86 +_NEON2SSESTORAGE int8x8_t vmul_s8(int8x8_t a, int8x8_t b); // VMUL.I8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vmul_s8(int8x8_t a, int8x8_t b) // VMUL.I8 d0,d0,d0 +{ + // no 8 bit simd multiply, need to go to 16 bits in SSE + int8x8_t res64; + __m128i a128, b128, res; + a128 = _MM_CVTEPI8_EPI16 (_pM128i(a)); // SSE 4.1 use low 64 bits + b128 = _MM_CVTEPI8_EPI16 (_pM128i(b)); // SSE 4.1 use low 64 bits + res = _mm_mullo_epi16 (a128, b128); + res = _mm_shuffle_epi8 (res, *(__m128i*) mask8_16_even_odd); //return to 8 bit from 16, use 64 low bits only + return64(res); +} + +_NEON2SSE_GLOBAL int16x4_t vmul_s16(int16x4_t a, int16x4_t b); // VMUL.I16 d0,d0,d0 +#define vmul_s16 vmul_u16 + +_NEON2SSE_GLOBAL int32x2_t vmul_s32(int32x2_t a, int32x2_t b); // VMUL.I32 d0,d0,d0 +#define vmul_s32 vmul_u32 + +_NEON2SSESTORAGE float32x2_t vmul_f32(float32x2_t a, float32x2_t b); // VMUL.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vmul_f32(float32x2_t a, float32x2_t b) +{ + float32x4_t tmp; + __m64_128 res64; + tmp = _mm_mul_ps(_pM128(a),_pM128(b)); + _M64f(res64, tmp); //use low 64 bits + return res64; +} + +_NEON2SSESTORAGE uint8x8_t vmul_u8(uint8x8_t a, uint8x8_t b); // VMUL.I8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vmul_u8(uint8x8_t a, uint8x8_t b) // VMUL.I8 d0,d0,d0 +{ + // no 8 bit simd multiply, need to go to 16 bits in SSE + uint8x8_t res64; + __m128i mask, a128, b128, res; + mask = _mm_set1_epi16(0xff); + a128 = _MM_CVTEPU8_EPI16 (_pM128i(a)); + b128 = _MM_CVTEPU8_EPI16 (_pM128i(b)); + res = _mm_mullo_epi16 (a128, b128); + res = _mm_and_si128(res, mask); //to avoid saturation + res = _mm_packus_epi16 (res,res); //use only low 64 bits + return64(res); +} + +_NEON2SSESTORAGE uint16x4_t vmul_u16(uint16x4_t a, uint16x4_t b); // VMUL.I16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vmul_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(_mm_mullo_epi16(_pM128i(a),_pM128i(b))); +} + +_NEON2SSESTORAGE uint32x2_t vmul_u32(uint32x2_t a, uint32x2_t b); // VMUL.I32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING( uint32x2_t vmul_u32(uint32x2_t a, uint32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint32x2_t res; + res.m64_u32[0] = a.m64_u32[0] * b.m64_u32[0]; + res.m64_u32[1] = a.m64_u32[1] * b.m64_u32[1]; + return res; +} + +_NEON2SSESTORAGE poly8x8_t vmul_p8(poly8x8_t a, poly8x8_t b); // VMUL.P8 d0,d0,d0 +_NEON2SSE_INLINE poly8x8_t vmul_p8(poly8x8_t a, poly8x8_t b) +{ + //may be optimized + poly8x8_t res64; + __m128i a64, b64, c1, res, tmp, bmasked; + int i; + a64 = _pM128i(a); + b64 = _pM128i(b); + c1 = _mm_cmpeq_epi8 (a64,a64); //all ones 0xff.... + c1 = vshrq_n_u8(c1,7); //0x1 + bmasked = _mm_and_si128(b64, c1); //0x1 + res = vmulq_u8(a64, bmasked); + for(i = 1; i<8; i++) { + c1 = _mm_slli_epi16(c1,1); //shift mask left by 1, 16 bit shift is OK here + bmasked = _mm_and_si128(b64, c1); //0x1 + tmp = vmulq_u8(a64, bmasked); + res = _mm_xor_si128(res, tmp); + } + return64 (res); +} + +_NEON2SSESTORAGE int8x16_t vmulq_s8(int8x16_t a, int8x16_t b); // VMUL.I8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vmulq_s8(int8x16_t a, int8x16_t b) // VMUL.I8 q0,q0,q0 +{ + // no 8 bit simd multiply, need to go to 16 bits +//solution may be not optimal + __m128i r16_1, r16_2; +#ifdef USE_AVX2 + __m256i a16, b16, r16; + a16 = _mm256_cvtepi8_epi16(a); + b16 = _mm256_cvtepi8_epi16(b); + r16 = _mm256_mullo_epi16(a16, b16); + r16 = _mm256_shuffle_epi8(r16, *(__m256i*)mask8_16_even_odd); //return to 8 bit + r16_1 = _mm256_castsi256_si128(r16); + r16_2 = _mm256_extractf128_si256(r16, 1); +#else + __m128i a16, b16; + a16 = _MM_CVTEPI8_EPI16(a); // SSE 4.1 + b16 = _MM_CVTEPI8_EPI16(b); // SSE 4.1 + r16_1 = _mm_mullo_epi16(a16, b16); + //swap hi and low part of a and b to process the remaining data + a16 = _mm_shuffle_epi32(a, _SWAP_HI_LOW32); + b16 = _mm_shuffle_epi32(b, _SWAP_HI_LOW32); + a16 = _MM_CVTEPI8_EPI16(a16); // SSE 4.1 + b16 = _MM_CVTEPI8_EPI16(b16); // SSE 4.1 __m128i r16_2 + + r16_2 = _mm_mullo_epi16(a16, b16); + r16_1 = _mm_shuffle_epi8(r16_1, *(__m128i*)mask8_16_even_odd); //return to 8 bit + r16_2 = _mm_shuffle_epi8(r16_2, *(__m128i*)mask8_16_even_odd); //return to 8 bit +#endif + return _mm_unpacklo_epi64(r16_1, r16_2); +} + +_NEON2SSE_GLOBAL int16x8_t vmulq_s16(int16x8_t a, int16x8_t b); // VMUL.I16 q0,q0,q0 +#define vmulq_s16 _mm_mullo_epi16 + +_NEON2SSE_GLOBAL int32x4_t vmulq_s32(int32x4_t a, int32x4_t b); // VMUL.I32 q0,q0,q0 +#define vmulq_s32 _MM_MULLO_EPI32 //SSE4.1 + +_NEON2SSE_GLOBAL float32x4_t vmulq_f32(float32x4_t a, float32x4_t b); // VMUL.F32 q0,q0,q0 +#define vmulq_f32 _mm_mul_ps + +_NEON2SSESTORAGE uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b); // VMUL.I8 q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b) // VMUL.I8 q0,q0,q0 +{ + // no 8 bit simd multiply, need to go to 16 bits + //solution may be not optimal + __m128i r16_1, r16_2; +#ifdef USE_AVX2 + __m256i a16, b16, r16; + a16 = _mm256_cvtepu8_epi16(a); + b16 = _mm256_cvtepu8_epi16(b); + r16 = _mm256_mullo_epi16(a16, b16); + r16 = _mm256_shuffle_epi8(r16, *(__m256i*)mask8_16_even_odd); //return to 8 bit + r16_1 = _mm256_castsi256_si128(r16); + r16_2 = _mm256_extractf128_si256(r16, 1); +#else + __m128i a16, b16; + a16 = _MM_CVTEPU8_EPI16(a); // SSE 4.1 + b16 = _MM_CVTEPU8_EPI16(b); // SSE 4.1 + r16_1 = _mm_mullo_epi16(a16, b16); + //swap hi and low part of a and b to process the remaining data + a16 = _mm_shuffle_epi32(a, _SWAP_HI_LOW32); + b16 = _mm_shuffle_epi32(b, _SWAP_HI_LOW32); + a16 = _MM_CVTEPU8_EPI16(a16); // SSE 4.1 + b16 = _MM_CVTEPU8_EPI16(b16); // SSE 4.1 __m128i r16_2 + + r16_2 = _mm_mullo_epi16(a16, b16); + r16_1 = _mm_shuffle_epi8(r16_1, *(__m128i*)mask8_16_even_odd); //return to 8 bit + r16_2 = _mm_shuffle_epi8(r16_2, *(__m128i*)mask8_16_even_odd); //return to 8 bit +#endif + return _mm_unpacklo_epi64(r16_1, r16_2); + +} + +_NEON2SSE_GLOBAL uint16x8_t vmulq_u16(uint16x8_t a, uint16x8_t b); // VMUL.I16 q0,q0,q0 +#define vmulq_u16 _mm_mullo_epi16 + +_NEON2SSE_GLOBAL uint32x4_t vmulq_u32(uint32x4_t a, uint32x4_t b); // VMUL.I32 q0,q0,q0 +#define vmulq_u32 _MM_MULLO_EPI32 //SSE4.1 + +_NEON2SSESTORAGE poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b); // VMUL.P8 q0,q0,q0 +_NEON2SSE_INLINE poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b) +{ + //may be optimized + __m128i c1, res, tmp, bmasked; + int i; + c1 = _mm_cmpeq_epi8 (a,a); //all ones 0xff.... + c1 = vshrq_n_u8(c1,7); //0x1 + bmasked = _mm_and_si128(b, c1); //0x1 + res = vmulq_u8(a, bmasked); + for(i = 1; i<8; i++) { + c1 = _mm_slli_epi16(c1,1); //shift mask left by 1, 16 bit shift is OK here + bmasked = _mm_and_si128(b, c1); //0x1 + tmp = vmulq_u8(a, bmasked); + res = _mm_xor_si128(res, tmp); + } + return res; +} + +//************************* Vector long multiply *********************************** +//**************************************************************************** +_NEON2SSESTORAGE int16x8_t vmull_s8(int8x8_t a, int8x8_t b); // VMULL.S8 q0,d0,d0 +_NEON2SSE_INLINE int16x8_t vmull_s8(int8x8_t a, int8x8_t b) // VMULL.S8 q0,d0,d0 +{ + //no 8 bit simd multiply, need to go to 16 bits + __m128i a16, b16; + a16 = _MM_CVTEPI8_EPI16 (_pM128i(a)); // SSE 4.1 + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); // SSE 4.1 + return _mm_mullo_epi16 (a16, b16); //should fit into 16 bit +} + +_NEON2SSESTORAGE int32x4_t vmull_s16(int16x4_t a, int16x4_t b); // VMULL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vmull_s16(int16x4_t a, int16x4_t b) // VMULL.S16 q0,d0,d0 +{ +#ifdef USE_SSE4 + __m128i a16, b16; + a16 = _MM_CVTEPI16_EPI32 (_pM128i(a)); // SSE 4.1 + b16 = _MM_CVTEPI16_EPI32 (_pM128i(b)); // SSE 4.1 + return _MM_MULLO_EPI32 (a16, b16); // SSE 4.1 +#else + __m128i low, hi, a128,b128; + a128 = _pM128i(a); + b128 = _pM128i(b); + low = _mm_mullo_epi16(a128,b128); + hi = _mm_mulhi_epi16(a128,b128); + return _mm_unpacklo_epi16(low,hi); +#endif +} + +_NEON2SSESTORAGE int64x2_t vmull_s32(int32x2_t a, int32x2_t b); // VMULL.S32 q0,d0,d0 +_NEON2SSE_INLINE int64x2_t vmull_s32(int32x2_t a, int32x2_t b) // VMULL.S32 q0,d0,d0 +{ + __m128i ab, ba, a128, b128; + a128 = _pM128i(a); + b128 = _pM128i(b); + ab = _mm_unpacklo_epi32 (a128, b128); //a0, b0, a1,b1 + ba = _mm_unpacklo_epi32 (b128, a128); //b0, a0, b1,a1 + return _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result +} + +_NEON2SSESTORAGE uint16x8_t vmull_u8(uint8x8_t a, uint8x8_t b); // VMULL.U8 q0,d0,d0 +_NEON2SSE_INLINE uint16x8_t vmull_u8(uint8x8_t a, uint8x8_t b) // VMULL.U8 q0,d0,d0 +{ + //no 8 bit simd multiply, need to go to 16 bits + __m128i a16, b16; + a16 = _MM_CVTEPU8_EPI16 (_pM128i(a)); // SSE 4.1 + b16 = _MM_CVTEPU8_EPI16 (_pM128i(b)); // SSE 4.1 + return _mm_mullo_epi16 (a16, b16); //should fit into 16 bit +} + +_NEON2SSESTORAGE uint32x4_t vmull_u16(uint16x4_t a, uint16x4_t b); // VMULL.s16 q0,d0,d0 +_NEON2SSE_INLINE uint32x4_t vmull_u16(uint16x4_t a, uint16x4_t b) // VMULL.s16 q0,d0,d0 +{ +#ifdef USE_SSE4 + __m128i a16, b16; + a16 = _MM_CVTEPU16_EPI32 (_pM128i(a)); // SSE 4.1 + b16 = _MM_CVTEPU16_EPI32 (_pM128i(b)); // SSE 4.1 + return _MM_MULLO_EPI32 (a16, b16); // SSE 4.1 +#else + __m128i a128,b128,low, hi; + a128 = _pM128i(a); + b128 = _pM128i(b); + low = _mm_mullo_epi16(a128,b128); + hi = _mm_mulhi_epu16(a128,b128); + return _mm_unpacklo_epi16(low,hi); +#endif +} + +_NEON2SSESTORAGE uint64x2_t vmull_u32(uint32x2_t a, uint32x2_t b); // VMULL.U32 q0,d0,d0 +_NEON2SSE_INLINE uint64x2_t vmull_u32(uint32x2_t a, uint32x2_t b) // VMULL.U32 q0,d0,d0 +{ + ///may be not optimal compared with serial implementation + __m128i ab, ba, a128, b128; + a128 = _pM128i(a); + b128 = _pM128i(b); + ab = _mm_unpacklo_epi32 (a128, b128); //a0, b0, a1,b1 + ba = _mm_unpacklo_epi32 (b128, a128); //b0, a0, b1,a1 + return _mm_mul_epu32 (ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result +} + +_NEON2SSESTORAGE poly16x8_t vmull_p8(poly8x8_t a, poly8x8_t b); // VMULL.P8 q0,d0,d0 +_NEON2SSE_INLINE poly16x8_t vmull_p8(poly8x8_t a, poly8x8_t b) +{ + //may be optimized + __m128i a128,b128, c1, a128_16, bmasked_16, res, tmp, bmasked; + int i; + a128 = _pM128i(a); + b128 = _pM128i(b); + c1 = _mm_cmpeq_epi8 (a128,a128); //all ones 0xff.... + c1 = vshrq_n_u8(c1,7); //0x1 + bmasked = _mm_and_si128(b128, c1); //0x1 + + a128_16 = _MM_CVTEPU8_EPI16 (a128); // SSE 4.1 + bmasked_16 = _MM_CVTEPU8_EPI16 (bmasked); // SSE 4.1 + res = _mm_mullo_epi16 (a128_16, bmasked_16); //should fit into 16 bit + for(i = 1; i<8; i++) { + c1 = _mm_slli_epi16(c1,1); //shift mask left by 1, 16 bit shift is OK here + bmasked = _mm_and_si128(b128, c1); //0x1 + bmasked_16 = _MM_CVTEPU8_EPI16 (bmasked); // SSE 4.1 + tmp = _mm_mullo_epi16 (a128_16, bmasked_16); //should fit into 16 bit, vmull_u8(a, bmasked); + res = _mm_xor_si128(res, tmp); + } + return res; +} + +//****************Vector saturating doubling long multiply ************************** +//***************************************************************** +_NEON2SSESTORAGE int32x4_t vqdmull_s16(int16x4_t a, int16x4_t b); // VQDMULL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vqdmull_s16(int16x4_t a, int16x4_t b) +{ + //the serial soulution may be faster due to saturation + __m128i res; + res = vmull_s16(a, b); + return vqd_s32(res); +} + +_NEON2SSESTORAGE int64x2_t vqdmull_s32(int32x2_t a, int32x2_t b); // VQDMULL.S32 q0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmull_s32(int32x2_t a, int32x2_t b),_NEON2SSE_REASON_SLOW_SERIAL) +{ + //the serial soulution may be faster due to saturation + __m128i res; + res = vmull_s32(a,b); + return vqaddq_s64(res,res); //slow serial function!!!! +} + +//********************* Vector multiply accumulate: vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i] ************************ +//****************************************************************************************** +_NEON2SSESTORAGE int8x8_t vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VMLA.I8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) // VMLA.I8 d0,d0,d0 +{ + // no 8 bit x86 simd multiply, need to go to 16 bits, and use the low 64 bits + int8x8_t res64; + __m128i b128, c128, res; + b128 = _MM_CVTEPI8_EPI16 (_pM128i(b)); // SSE 4.1 use low 64 bits + c128 = _MM_CVTEPI8_EPI16 (_pM128i(c)); // SSE 4.1 use low 64 bits + res = _mm_mullo_epi16 (c128, b128); + res = _mm_shuffle_epi8 (res, *(__m128i*) mask8_16_even_odd); + res = _mm_add_epi8 (res, _pM128i(a)); //use the low 64 bits + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c); // VMLA.I16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) +{ + int16x4_t res64; + return64(vmlaq_s16(_pM128i(a),_pM128i(b), _pM128i(c))); +} + + +_NEON2SSESTORAGE int32x2_t vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c); // VMLA.I32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) // VMLA.I32 d0,d0,d0 +{ + int32x2_t res64; + __m128i res; + res = _MM_MULLO_EPI32 (_pM128i(b), _pM128i(c)); //SSE4.1 + res = _mm_add_epi32 (res, _pM128i(a)); //use the low 64 bits + return64(res); +} + +_NEON2SSESTORAGE float32x2_t vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c); // VMLA.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) +{ + __m128 res; + __m64_128 res64; +#ifdef USE_AVX2 + //fma + res = _mm_fmadd_ps(_pM128(c), _pM128(b), _pM128(a)); +#else + res = _mm_mul_ps (_pM128(c), _pM128(b)); + res = _mm_add_ps (_pM128(a), res); +#endif + _M64f(res64, res); + return res64; +} + +_NEON2SSESTORAGE uint8x8_t vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VMLA.I8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) // VMLA.I8 d0,d0,d0 +{ + // no 8 bit x86 simd multiply, need to go to 16 bits, and use the low 64 bits + uint8x8_t res64; + __m128i mask, b128, c128, res; + mask = _mm_set1_epi16(0xff); + b128 = _MM_CVTEPU8_EPI16 (_pM128i(b)); // SSE 4.1 use low 64 bits + c128 = _MM_CVTEPU8_EPI16 (_pM128i(c)); // SSE 4.1 use low 64 bits + res = _mm_mullo_epi16 (c128, b128); + res = _mm_and_si128(res, mask); //to avoid saturation + res = _mm_packus_epi16 (res, res); + res = _mm_add_epi8 (res, _pM128i(a)); //use the low 64 bits + return64(res); +} + +_NEON2SSE_GLOBAL uint16x4_t vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VMLA.I16 d0,d0,d0 +#define vmla_u16 vmla_s16 + +_NEON2SSE_GLOBAL uint32x2_t vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VMLA.I32 d0,d0,d0 +#define vmla_u32 vmla_s32 + +_NEON2SSESTORAGE int8x16_t vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLA.I8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) // VMLA.I8 q0,q0,q0 +{ + //solution may be not optimal + int8x16_t res = vmulq_s8(b,c); + return _mm_add_epi8(res, a); +} + +_NEON2SSESTORAGE int16x8_t vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLA.I16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) // VMLA.I16 q0,q0,q0 +{ + __m128i res; + res = _mm_mullo_epi16 (c, b); + return _mm_add_epi16 (res, a); +} + +_NEON2SSESTORAGE int32x4_t vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLA.I32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) // VMLA.I32 q0,q0,q0 +{ + __m128i res; + res = _MM_MULLO_EPI32 (c, b); //SSE4.1 + return _mm_add_epi32 (res, a); +} + +_NEON2SSESTORAGE float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLA.F32 q0,q0,q0 +#ifdef USE_AVX2 +//fma +#define vmlaq_f32(a, b, c) _mm_fmadd_ps(c, b, a) //swap arguments +#else +_NEON2SSE_INLINE float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) // VMLA.F32 q0,q0,q0 +{ + __m128 res; + res = _mm_mul_ps (c, b); + return _mm_add_ps (a, res); +} +#endif + +_NEON2SSESTORAGE uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLA.I8 q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) // VMLA.I8 q0,q0,q0 +{ + //solution may be not optimal + int8x16_t res = vmulq_u8(b, c); + return _mm_add_epi8(res, a); +} + +_NEON2SSE_GLOBAL uint16x8_t vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLA.I16 q0,q0,q0 +#define vmlaq_u16 vmlaq_s16 + +_NEON2SSE_GLOBAL uint32x4_t vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLA.I32 q0,q0,q0 +#define vmlaq_u32 vmlaq_s32 + +//********************** Vector widening multiply accumulate (long multiply accumulate): +// vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i] ************** +//******************************************************************************************** +_NEON2SSESTORAGE int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VMLAL.S8 q0,d0,d0 +_NEON2SSE_INLINE int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) // VMLAL.S8 q0,d0,d0 +{ + int16x8_t res; + res = vmull_s8(b, c); + return _mm_add_epi16 (res, a); +} + +_NEON2SSESTORAGE int32x4_t vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VMLAL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) // VMLAL.S16 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + int32x4_t res; + res = vmull_s16(b, c); + return _mm_add_epi32 (res, a); +} + +_NEON2SSESTORAGE int64x2_t vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VMLAL.S32 q0,d0,d0 +_NEON2SSE_INLINE int64x2_t vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) // VMLAL.S32 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + int64x2_t res; + res = vmull_s32( b, c); + return _mm_add_epi64 (res, a); +} + +_NEON2SSESTORAGE uint16x8_t vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c); // VMLAL.U8 q0,d0,d0 +_NEON2SSE_INLINE uint16x8_t vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) // VMLAL.U8 q0,d0,d0 +{ + uint16x8_t res; + res = vmull_u8(b, c); + return _mm_add_epi16 (res, a); +} + +_NEON2SSESTORAGE uint32x4_t vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VMLAL.s16 q0,d0,d0 +_NEON2SSE_INLINE uint32x4_t vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) // VMLAL.s16 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + uint32x4_t res; + res = vmull_u16(b, c); + return _mm_add_epi32 (res, a); +} + +_NEON2SSESTORAGE uint64x2_t vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VMLAL.U32 q0,d0,d0 +_NEON2SSE_INLINE uint64x2_t vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) // VMLAL.U32 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + int64x2_t res; + res = vmull_u32( b,c); + return _mm_add_epi64 (res, a); +} + +//******************** Vector multiply subtract: vmls -> Vr[i] := Va[i] - Vb[i] * Vc[i] *************************************** +//******************************************************************************************** +_NEON2SSESTORAGE int8x8_t vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VMLS.I8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) // VMLS.I8 d0,d0,d0 +{ + // no 8 bit simd multiply, need to go to 16 bits - and use the low 64 bits + int8x8_t res64; + __m128i res; + res64 = vmul_s8(b,c); + res = _mm_sub_epi8 (_pM128i(a), _pM128i(res64)); + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c); // VMLS.I16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) +{ + int16x4_t res64; + return64(vmlsq_s16(_pM128i(a),_pM128i(b), _pM128i(c))); +} + + +_NEON2SSESTORAGE int32x2_t vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c); // VMLS.I32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) // VMLS.I32 d0,d0,d0 +{ + int32x2_t res64; + __m128i res; + res = _MM_MULLO_EPI32 (_pM128i(c),_pM128i( b)); //SSE4.1 + res = _mm_sub_epi32 (_pM128i(a),res); //use low 64 bits only + return64(res); +} + +_NEON2SSESTORAGE float32x2_t vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c); // VMLS.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) +{ + __m128 res; + __m64_128 res64; +#ifdef USE_AVX2 + //fma + res = _mm_fmsub_ps(_pM128(c), _pM128(b), _pM128(a)); +#else + res = _mm_mul_ps (_pM128(c), _pM128(b)); + res = _mm_sub_ps (_pM128(a), res); +#endif + _M64f(res64, res); + return res64; +} + +_NEON2SSESTORAGE uint8x8_t vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VMLS.I8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) +{ + // no 8 bit simd multiply, need to go to 16 bits - and use the low 64 bits + uint8x8_t res64; + __m128i res; + res64 = vmul_u8(b,c); + res = _mm_sub_epi8 (_pM128i(a), _pM128i(res64)); + return64(res); +} + +_NEON2SSE_GLOBAL uint16x4_t vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VMLS.I16 d0,d0,d0 +#define vmls_u16 vmls_s16 + +_NEON2SSE_GLOBAL uint32x2_t vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VMLS.I32 d0,d0,d0 +#define vmls_u32 vmls_s32 + + +_NEON2SSESTORAGE int8x16_t vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLS.I8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) // VMLS.I8 q0,q0,q0 +{ + //solution may be not optimal + int8x16_t res = vmulq_s8(b, c); + return _mm_sub_epi8(a, res); + } + +_NEON2SSESTORAGE int16x8_t vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLS.I16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) // VMLS.I16 q0,q0,q0 +{ + __m128i res; + res = _mm_mullo_epi16 (c, b); + return _mm_sub_epi16 (a, res); +} + +_NEON2SSESTORAGE int32x4_t vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLS.I32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) // VMLS.I32 q0,q0,q0 +{ + __m128i res; + res = _MM_MULLO_EPI32 (c, b); //SSE4.1 + return _mm_sub_epi32 (a, res); +} + +_NEON2SSESTORAGE float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLS.F32 q0,q0,q0 +#ifdef USE_AVX2 +//fma +#define vmlsq_f32(a, b, c) _mm_fmsub_ps(c, b, a) //swap arguments +#else +_NEON2SSE_INLINE float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) // VMLS.F32 q0,q0,q0 +{ + __m128 res; + res = _mm_mul_ps (c, b); + return _mm_sub_ps (a, res); +} +#endif + +_NEON2SSESTORAGE uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLS.I8 q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) // VMLS.I8 q0,q0,q0 +{ + //solution may be not optimal + int8x16_t res = vmulq_u8(b, c); + return _mm_sub_epi8(a, res); +} + + +_NEON2SSE_GLOBAL uint16x8_t vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLS.I16 q0,q0,q0 +#define vmlsq_u16 vmlsq_s16 + +_NEON2SSE_GLOBAL uint32x4_t vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLS.I32 q0,q0,q0 +#define vmlsq_u32 vmlsq_s32 + +//******************** Vector multiply subtract long (widening multiply subtract) ************************************ +//************************************************************************************************************* +_NEON2SSESTORAGE int16x8_t vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VMLSL.S8 q0,d0,d0 +_NEON2SSE_INLINE int16x8_t vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) // VMLSL.S8 q0,d0,d0 +{ + int16x8_t res; + res = vmull_s8(b, c); + return _mm_sub_epi16 (a, res); +} + +_NEON2SSESTORAGE int32x4_t vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VMLSL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) // VMLSL.S16 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + int32x4_t res; + res = vmull_s16(b, c); + return _mm_sub_epi32 (a, res); +} + +_NEON2SSESTORAGE int64x2_t vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VMLSL.S32 q0,d0,d0 +_NEON2SSE_INLINE int64x2_t vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) // VMLSL.S32 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + int64x2_t res; + res = vmull_s32( b,c); + return _mm_sub_epi64 (a, res); +} + +_NEON2SSESTORAGE uint16x8_t vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c); // VMLSL.U8 q0,d0,d0 +_NEON2SSE_INLINE uint16x8_t vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) // VMLSL.U8 q0,d0,d0 +{ + uint16x8_t res; + res = vmull_u8(b, c); + return _mm_sub_epi16 (a, res); +} + +_NEON2SSESTORAGE uint32x4_t vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VMLSL.s16 q0,d0,d0 +_NEON2SSE_INLINE uint32x4_t vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) // VMLSL.s16 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + uint32x4_t res; + res = vmull_u16(b, c); + return _mm_sub_epi32 (a, res); +} + +_NEON2SSESTORAGE uint64x2_t vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VMLSL.U32 q0,d0,d0 +_NEON2SSE_INLINE uint64x2_t vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) // VMLSL.U32 q0,d0,d0 +{ + //may be not optimal compared with serial implementation + int64x2_t res; + res = vmull_u32( b,c); + return _mm_sub_epi64 (a, res); +} + +//****** Vector saturating doubling multiply high ********************** +//************************************************************************* +_NEON2SSESTORAGE int16x4_t vqdmulh_s16(int16x4_t a, int16x4_t b); // VQDMULH.S16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x4_t vqdmulh_s16(int16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int16x4_t res; + int32_t a32, b32, i; + for (i = 0; i<4; i++) { + a32 = (int32_t) a.m64_i16[i]; + b32 = (int32_t) b.m64_i16[i]; + a32 = (a32 * b32) >> 15; + res.m64_i16[i] = (a32 == 0x8000) ? 0x7fff : (int16_t) a32; + } + return res; +} + +_NEON2SSESTORAGE int32x2_t vqdmulh_s32(int32x2_t a, int32x2_t b); // VQDMULH.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vqdmulh_s32(int32x2_t a, int32x2_t b) // no multiply high 32 bit SIMD in IA32, so need to do some tricks, serial solution may be faster +{ + //may be not optimal compared with a serial solution + int32x2_t res64; + __m128i mask; + _NEON2SSE_ALIGN_16 static const uint32_t cmask32[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + int64x2_t mul; + mul = vmull_s32(a,b); + mul = _mm_slli_epi64(mul,1); //double the result + //at this point start treating 2 64-bit numbers as 4 32-bit + mul = _mm_shuffle_epi32 (mul, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits + mask = _mm_cmpeq_epi32 (mul, *(__m128i*)cmask32); + mul = _mm_xor_si128 (mul, mask); //res saturated for 0x80000000 + return64(mul); +} + +_NEON2SSESTORAGE int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b); // VQDMULH.S16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b) // VQDMULH.S16 q0,q0,q0 +{ + __m128i res, res_lo, mask; + _NEON2SSE_ALIGN_16 static const uint16_t cmask[] = {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}; + res = _mm_mulhi_epi16 (a, b); + res = _mm_slli_epi16 (res, 1); //double the result, don't care about saturation + res_lo = _mm_mullo_epi16 (a, b); + res_lo = _mm_srli_epi16(res_lo,15); //take the highest bit + res = _mm_add_epi16(res, res_lo); //combine results + mask = _mm_cmpeq_epi16 (res, *(__m128i*)cmask); + return _mm_xor_si128 (res, mask); //res saturated for 0x8000 +} + +_NEON2SSESTORAGE int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b); // VQDMULH.S32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + // no multiply high 32 bit SIMD in IA32, may be not optimal compared with a serial solution for the target without AVX or SSE4 +#ifdef USE_AVX2 + __m128i r16_1, r16_2; + __m256i a64, b64, res, mask; + a64 = _mm256_cvtepu32_epi64(a); + b64 = _mm256_cvtepu32_epi64(b); + res = _mm256_mul_epi32(a64, b64); + res = _mm256_slli_epi64(res, 1); //double the result + //at this point start treating 64-bit numbers as 32-bit + res = _mm256_shuffle_epi32(res, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 4 32-bits in each lane + mask = _mm256_set1_epi32(0x80000000); + mask = _mm256_cmpeq_epi32(res, mask); + res = _mm256_xor_si256(res, mask); + r16_1 = _mm256_castsi256_si128(res); + r16_2 = _mm256_extractf128_si256(res, 1); + return _mm_unpacklo_epi64(r16_1, r16_2); +#else + __m128i ab, ba, mask, mul, mul1; + _NEON2SSE_ALIGN_16 static const uint32_t cmask32[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + ab = _mm_unpacklo_epi32 (a, b); //a0, b0, a1,b1 + ba = _mm_unpacklo_epi32 (b, a); //b0, a0, b1,a1 + mul = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result + mul = _mm_slli_epi64(mul,1); //double the result + ab = _mm_unpackhi_epi32 (a, b); //a2, b2, a3,b3 + ba = _mm_unpackhi_epi32 (b, a); //b2, a2, b3,a3 + mul1 = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result + mul1 = _mm_slli_epi64(mul1,1); //double the result + mul = _mm_shuffle_epi32 (mul, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits + mul1 = _mm_shuffle_epi32 (mul1, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits + mul = _mm_unpacklo_epi64(mul, mul1); + mask = _mm_cmpeq_epi32 (mul, *(__m128i*)cmask32); + return _mm_xor_si128 (mul, mask); //res saturated for 0x80000000 +#endif +} + +//********* Vector saturating rounding doubling multiply high **************** +//**************************************************************************** +//If use _mm_mulhrs_xx functions the result may differ from NEON one a little due to different rounding rules and order +_NEON2SSESTORAGE int16x4_t vqrdmulh_s16(int16x4_t a, int16x4_t b); // VQRDMULH.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vqrdmulh_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vqrdmulhq_s16(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE int32x2_t vqrdmulh_s32(int32x2_t a, int32x2_t b); // VQRDMULH.S32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqrdmulh_s32(int32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + //may be not optimal compared with a serial solution + int32x2_t res64; + _NEON2SSE_ALIGN_16 static const uint32_t cmask32[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + __m128i res_sat, mask, mask1; + int64x2_t mul; + mul = vmull_s32(a,b); + res_sat = _mm_slli_epi64 (mul, 1); //double the result, saturation not considered + mask1 = _mm_slli_epi64(res_sat, 32); //shift left then back right to + mask1 = _mm_srli_epi64(mask1,31); //get 31-th bit 1 or zero + mul = _mm_add_epi32 (res_sat, mask1); //actual rounding + //at this point start treating 2 64-bit numbers as 4 32-bit + mul = _mm_shuffle_epi32 (mul, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits from each 64-bit + mask = _mm_cmpeq_epi32 (mul, *(__m128i*)cmask32); + mul = _mm_xor_si128 (mul, mask); //res saturated for 0x80000000 + return64(mul); +} + +_NEON2SSESTORAGE int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b); // VQRDMULH.S16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b) // VQRDMULH.S16 q0,q0,q0 +{ + __m128i mask, res; + _NEON2SSE_ALIGN_16 static const uint16_t cmask[] = {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}; + res = _mm_mulhrs_epi16 (a, b); + mask = _mm_cmpeq_epi16 (res, *(__m128i*)cmask); + return _mm_xor_si128 (res, mask); //res saturated for 0x8000 +} + +_NEON2SSESTORAGE int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b); // VQRDMULH.S32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + // no multiply high 32 bit SIMD in IA32, may be not optimal compared with a serial solution for the without AVX or SSE4 +#ifdef USE_AVX2 + __m128i r16_1, r16_2; + __m256i a64, b64, res, mask; + a64 = _mm256_cvtepu32_epi64(a); + b64 = _mm256_cvtepu32_epi64(b); + res = _mm256_mul_epi32(a64, b64); + res = _mm256_slli_epi64(res, 1); //double the result,, saturation not considered + mask = _mm256_slli_epi64(res, 32); //shift left then back right to + mask = _mm256_srli_epi64(mask, 31); //get 31-th bit 1 or zero + res = _mm256_add_epi32(res, mask); //actual rounding + //at this point start treating 64-bit numbers as 32-bit + res = _mm256_shuffle_epi32(res, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 4 32-bits in each lane + mask = _mm256_set1_epi32(0x80000000); + mask = _mm256_cmpeq_epi32(res, mask); + res = _mm256_xor_si256(res, mask); + r16_1 = _mm256_castsi256_si128(res); + r16_2 = _mm256_extractf128_si256(res, 1); + return _mm_unpacklo_epi64(r16_1, r16_2); +#else + __m128i ab, ba, mask, mul, mul1, mask1; + _NEON2SSE_ALIGN_16 static const uint32_t cmask32[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + ab = _mm_unpacklo_epi32 (a, b); //a0, b0, a1,b1 + ba = _mm_unpacklo_epi32 (b, a); //b0, a0, b1,a1 + mul = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result + mul = _mm_slli_epi64 (mul, 1); //double the result, saturation not considered + mask1 = _mm_slli_epi64(mul, 32); //shift left then back right to + mask1 = _mm_srli_epi64(mask1,31); //get 31-th bit 1 or zero + mul = _mm_add_epi32 (mul, mask1); //actual rounding + + ab = _mm_unpackhi_epi32 (a, b); //a2, b2, a3,b3 + ba = _mm_unpackhi_epi32 (b, a); //b2, a2, b3,a3 + mul1 = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result + mul1 = _mm_slli_epi64 (mul1, 1); //double the result, saturation not considered + mask1 = _mm_slli_epi64(mul1, 32); //shift left then back right to + mask1 = _mm_srli_epi64(mask1,31); //get 31-th bit 1 or zero + mul1 = _mm_add_epi32 (mul1, mask1); //actual rounding + //at this point start treating 2 64-bit numbers as 4 32-bit + mul = _mm_shuffle_epi32 (mul, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits from each 64-bit + mul1 = _mm_shuffle_epi32 (mul1, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits from each 64-bit + mul = _mm_unpacklo_epi64(mul, mul1); + mask = _mm_cmpeq_epi32 (mul, *(__m128i*)cmask32); + return _mm_xor_si128 (mul, mask); //res saturated for 0x80000000 +#endif +} + +//*************Vector widening saturating doubling multiply accumulate (long saturating doubling multiply accumulate) ***** +//************************************************************************************************************************* +_NEON2SSESTORAGE int32x4_t vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VQDMLAL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) // VQDMLAL.S16 q0,d0,d0 +{ + //not optimal SIMD soulution, serial may be faster + __m128i res32; + res32 = vmull_s16(b, c); + res32 = vqd_s32(res32); //doubling & saturation ,if no saturation we could use _mm_slli_epi32 (res, 1); + return vqaddq_s32(res32, a); //saturation +} + +_NEON2SSESTORAGE int64x2_t vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VQDMLAL.S32 q0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c),_NEON2SSE_REASON_SLOW_SERIAL) +{ + __m128i res64; + res64 = vmull_s32(b,c); + res64 = vqaddq_s64(res64, res64); //doubling & saturation ,if no saturation we could use _mm_slli_epi64 (res, 1); + return vqaddq_s64(res64, a); //saturation +} + +//************************************************************************************ +//****************** Vector subtract *********************************************** +//************************************************************************************ +_NEON2SSESTORAGE int8x8_t vsub_s8(int8x8_t a, int8x8_t b); // VSUB.I8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vsub_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_sub_epi8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vsub_s16(int16x4_t a, int16x4_t b); // VSUB.I16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vsub_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_sub_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vsub_s32(int32x2_t a, int32x2_t b); // VSUB.I32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vsub_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(_mm_sub_epi32(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vsub_s64(int64x1_t a, int64x1_t b); // VSUB.I64 d0,d0,d0 +_NEON2SSE_INLINE int64x1_t vsub_s64(int64x1_t a, int64x1_t b) +{ + int64x1_t res64; + res64.m64_i64[0] = a.m64_i64[0] - b.m64_i64[0]; + return res64; +} + + +_NEON2SSESTORAGE float32x2_t vsub_f32(float32x2_t a, float32x2_t b); // VSUB.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vsub_f32(float32x2_t a, float32x2_t b) +{ + float32x2_t res64; + return64f(_mm_sub_ps(_pM128(a), _pM128(b))); +} + +_NEON2SSE_GLOBAL uint8x8_t vsub_u8(uint8x8_t a, uint8x8_t b); // VSUB.I8 d0,d0,d0 +#define vsub_u8 vsub_s8 + +_NEON2SSE_GLOBAL uint16x4_t vsub_u16(uint16x4_t a, uint16x4_t b); // VSUB.I16 d0,d0,d0 +#define vsub_u16 vsub_s16 + +_NEON2SSE_GLOBAL uint32x2_t vsub_u32(uint32x2_t a, uint32x2_t b); // VSUB.I32 d0,d0,d0 +#define vsub_u32 vsub_s32 + + +_NEON2SSESTORAGE uint64x1_t vsub_u64(uint64x1_t a, uint64x1_t b); // VSUB.I64 d0,d0,d0 +_NEON2SSE_INLINE uint64x1_t vsub_u64(uint64x1_t a, uint64x1_t b) +{ + int64x1_t res64; + res64.m64_u64[0] = a.m64_u64[0] - b.m64_u64[0]; + return res64; +} + + +_NEON2SSE_GLOBAL int8x16_t vsubq_s8(int8x16_t a, int8x16_t b); // VSUB.I8 q0,q0,q0 +#define vsubq_s8 _mm_sub_epi8 + +_NEON2SSE_GLOBAL int16x8_t vsubq_s16(int16x8_t a, int16x8_t b); // VSUB.I16 q0,q0,q0 +#define vsubq_s16 _mm_sub_epi16 + +_NEON2SSE_GLOBAL int32x4_t vsubq_s32(int32x4_t a, int32x4_t b); // VSUB.I32 q0,q0,q0 +#define vsubq_s32 _mm_sub_epi32 + +_NEON2SSE_GLOBAL int64x2_t vsubq_s64(int64x2_t a, int64x2_t b); // VSUB.I64 q0,q0,q0 +#define vsubq_s64 _mm_sub_epi64 + +_NEON2SSE_GLOBAL float32x4_t vsubq_f32(float32x4_t a, float32x4_t b); // VSUB.F32 q0,q0,q0 +#define vsubq_f32 _mm_sub_ps + +_NEON2SSE_GLOBAL uint8x16_t vsubq_u8(uint8x16_t a, uint8x16_t b); // VSUB.I8 q0,q0,q0 +#define vsubq_u8 _mm_sub_epi8 + +_NEON2SSE_GLOBAL uint16x8_t vsubq_u16(uint16x8_t a, uint16x8_t b); // VSUB.I16 q0,q0,q0 +#define vsubq_u16 _mm_sub_epi16 + +_NEON2SSE_GLOBAL uint32x4_t vsubq_u32(uint32x4_t a, uint32x4_t b); // VSUB.I32 q0,q0,q0 +#define vsubq_u32 _mm_sub_epi32 + +_NEON2SSE_GLOBAL uint64x2_t vsubq_u64(uint64x2_t a, uint64x2_t b); // VSUB.I64 q0,q0,q0 +#define vsubq_u64 _mm_sub_epi64 + +//***************Vector long subtract: vsub -> Vr[i]:=Va[i]-Vb[i] ****************** +//*********************************************************************************** +//Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width. +_NEON2SSESTORAGE int16x8_t vsubl_s8(int8x8_t a, int8x8_t b); // VSUBL.S8 q0,d0,d0 +_NEON2SSE_INLINE int16x8_t vsubl_s8(int8x8_t a, int8x8_t b) // VSUBL.S8 q0,d0,d0 +{ + __m128i a16, b16; + a16 = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE4.1, + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi16 (a16, b16); +} + +_NEON2SSESTORAGE int32x4_t vsubl_s16(int16x4_t a, int16x4_t b); // VSUBL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vsubl_s16(int16x4_t a, int16x4_t b) // VSUBL.S16 q0,d0,d0 +{ + __m128i a32, b32; + a32 = _MM_CVTEPI16_EPI32 (_pM128i(a)); //SSE4.1 + b32 = _MM_CVTEPI16_EPI32 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi32 (a32, b32); +} + +_NEON2SSESTORAGE int64x2_t vsubl_s32(int32x2_t a, int32x2_t b); // VSUBL.S32 q0,d0,d0 +_NEON2SSE_INLINE int64x2_t vsubl_s32(int32x2_t a, int32x2_t b) // VSUBL.S32 q0,d0,d0 +{ + //may be not optimal + __m128i a64, b64; + a64 = _MM_CVTEPI32_EPI64 (_pM128i(a)); //SSE4.1 + b64 = _MM_CVTEPI32_EPI64 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi64 (a64, b64); +} + +_NEON2SSESTORAGE uint16x8_t vsubl_u8(uint8x8_t a, uint8x8_t b); // VSUBL.U8 q0,d0,d0 +_NEON2SSE_INLINE uint16x8_t vsubl_u8(uint8x8_t a, uint8x8_t b) // VSUBL.U8 q0,d0,d0 +{ + __m128i a16, b16; + a16 = _MM_CVTEPU8_EPI16 (_pM128i(a)); //SSE4.1, + b16 = _MM_CVTEPU8_EPI16 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi16 (a16, b16); +} + +_NEON2SSESTORAGE uint32x4_t vsubl_u16(uint16x4_t a, uint16x4_t b); // VSUBL.s16 q0,d0,d0 +_NEON2SSE_INLINE uint32x4_t vsubl_u16(uint16x4_t a, uint16x4_t b) // VSUBL.s16 q0,d0,d0 +{ + __m128i a32, b32; + a32 = _MM_CVTEPU16_EPI32 (_pM128i(a)); //SSE4.1 + b32 = _MM_CVTEPU16_EPI32 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi32 (a32, b32); +} + +_NEON2SSESTORAGE uint64x2_t vsubl_u32(uint32x2_t a, uint32x2_t b); // VSUBL.U32 q0,d0,d0 +_NEON2SSE_INLINE uint64x2_t vsubl_u32(uint32x2_t a, uint32x2_t b) // VSUBL.U32 q0,d0,d0 +{ + //may be not optimal + __m128i a64, b64; + a64 = _MM_CVTEPU32_EPI64 (_pM128i(a)); //SSE4.1 + b64 = _MM_CVTEPU32_EPI64 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi64 (a64, b64); +} + +//***************** Vector wide subtract: vsub -> Vr[i]:=Va[i]-Vb[i] ********************************** +//***************************************************************************************************** +_NEON2SSESTORAGE int16x8_t vsubw_s8(int16x8_t a, int8x8_t b); // VSUBW.S8 q0,q0,d0 +_NEON2SSE_INLINE int16x8_t vsubw_s8(int16x8_t a, int8x8_t b) // VSUBW.S8 q0,q0,d0 +{ + __m128i b16; + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi16 (a, b16); +} + +_NEON2SSESTORAGE int32x4_t vsubw_s16(int32x4_t a, int16x4_t b); // VSUBW.S16 q0,q0,d0 +_NEON2SSE_INLINE int32x4_t vsubw_s16(int32x4_t a, int16x4_t b) // VSUBW.S16 q0,q0,d0 +{ + __m128i b32; + b32 = _MM_CVTEPI16_EPI32 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi32 (a, b32); +} + +_NEON2SSESTORAGE int64x2_t vsubw_s32(int64x2_t a, int32x2_t b); // VSUBW.S32 q0,q0,d0 +_NEON2SSE_INLINE int64x2_t vsubw_s32(int64x2_t a, int32x2_t b) // VSUBW.S32 q0,q0,d0 +{ + __m128i b64; + b64 = _MM_CVTEPI32_EPI64 (_pM128i(b)); //SSE4.1 + return _mm_sub_epi64 (a, b64); +} + +_NEON2SSESTORAGE uint16x8_t vsubw_u8(uint16x8_t a, uint8x8_t b); // VSUBW.U8 q0,q0,d0 +_NEON2SSE_INLINE uint16x8_t vsubw_u8(uint16x8_t a, uint8x8_t b) // VSUBW.U8 q0,q0,d0 +{ + __m128i b16; + b16 = _MM_CVTEPU8_EPI16 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi16 (a, b16); +} + +_NEON2SSESTORAGE uint32x4_t vsubw_u16(uint32x4_t a, uint16x4_t b); // VSUBW.s16 q0,q0,d0 +_NEON2SSE_INLINE uint32x4_t vsubw_u16(uint32x4_t a, uint16x4_t b) // VSUBW.s16 q0,q0,d0 +{ + __m128i b32; + b32 = _MM_CVTEPU16_EPI32 (_pM128i(b)); //SSE4.1, + return _mm_sub_epi32 (a, b32); +} + +_NEON2SSESTORAGE uint64x2_t vsubw_u32(uint64x2_t a, uint32x2_t b); // VSUBW.U32 q0,q0,d0 +_NEON2SSE_INLINE uint64x2_t vsubw_u32(uint64x2_t a, uint32x2_t b) // VSUBW.U32 q0,q0,d0 +{ + __m128i b64; + b64 = _MM_CVTEPU32_EPI64 (_pM128i(b)); //SSE4.1 + return _mm_sub_epi64 (a, b64); +} + +//************************Vector saturating subtract ********************************* +//************************************************************************************* +_NEON2SSESTORAGE int8x8_t vqsub_s8(int8x8_t a, int8x8_t b); // VQSUB.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vqsub_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_subs_epi8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vqsub_s16(int16x4_t a, int16x4_t b); // VQSUB.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vqsub_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_subs_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vqsub_s32(int32x2_t a, int32x2_t b); // VQSUB.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vqsub_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vqsubq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vqsub_s64(int64x1_t a, int64x1_t b); // VQSUB.S64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vqsub_s64(int64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) //no optimal SIMD soulution +{ + uint64x1_t res; + uint64_t a64,b64; + a64 = a.m64_u64[0]; + b64 = b.m64_u64[0]; + res.m64_u64[0] = a64 - b64; + + a64 = (a64 >> 63) + (~_SIGNBIT64); + if ((int64_t)((a64 ^ b64) & (a64 ^ res.m64_u64[0])) < 0) { + res.m64_u64[0] = a64; + } + return res; +} + +_NEON2SSESTORAGE uint8x8_t vqsub_u8(uint8x8_t a, uint8x8_t b); // VQSUB.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vqsub_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(_mm_subs_epu8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vqsub_u16(uint16x4_t a, uint16x4_t b); // VQSUB.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vqsub_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(_mm_subs_epu16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vqsub_u32(uint32x2_t a, uint32x2_t b); // VQSUB.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vqsub_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64(vqsubq_u32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint64x1_t vqsub_u64(uint64x1_t a, uint64x1_t b); // VQSUB.U64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vqsub_u64(uint64x1_t a, uint64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint64x1_t res; + uint64_t a64, b64; + a64 = _Ui64(a); + b64 = _Ui64(b); + if (a64 > b64) { + res.m64_u64[0] = a64 - b64; + } else { + res.m64_u64[0] = 0; + } + return res; +} + +_NEON2SSE_GLOBAL int8x16_t vqsubq_s8(int8x16_t a, int8x16_t b); // VQSUB.S8 q0,q0,q0 +#define vqsubq_s8 _mm_subs_epi8 + +_NEON2SSE_GLOBAL int16x8_t vqsubq_s16(int16x8_t a, int16x8_t b); // VQSUB.S16 q0,q0,q0 +#define vqsubq_s16 _mm_subs_epi16 + +_NEON2SSESTORAGE int32x4_t vqsubq_s32(int32x4_t a, int32x4_t b); // VQSUB.S32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vqsubq_s32(int32x4_t a, int32x4_t b) +{ + //no corresponding x86 SIMD soulution, special tricks are necessary. The overflow is possible only if a and b have opposite signs and sub has opposite sign to a + __m128i c7fffffff, res, res_sat, res_xor_a, b_xor_a; + c7fffffff = _mm_set1_epi32(0x7fffffff); + res = _mm_sub_epi32(a, b); + res_sat = _mm_srli_epi32(a, 31); + res_sat = _mm_add_epi32(res_sat, c7fffffff); + res_xor_a = _mm_xor_si128(res, a); + b_xor_a = _mm_xor_si128(b, a); + res_xor_a = _mm_and_si128(b_xor_a, res_xor_a); + res_xor_a = _mm_srai_epi32(res_xor_a,31); //propagate the sigh bit, all ffff if <0 all ones otherwise + res_sat = _mm_and_si128(res_xor_a, res_sat); + res = _mm_andnot_si128(res_xor_a, res); + return _mm_or_si128(res, res_sat); +} + +_NEON2SSESTORAGE int64x2_t vqsubq_s64(int64x2_t a, int64x2_t b); // VQSUB.S64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqsubq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) //no optimal SIMD soulution +{ + _NEON2SSE_ALIGN_16 int64_t atmp[2], btmp[2]; + _NEON2SSE_ALIGN_16 uint64_t res[2]; + _mm_store_si128((__m128i*)atmp, a); + _mm_store_si128((__m128i*)btmp, b); + res[0] = atmp[0] - btmp[0]; + res[1] = atmp[1] - btmp[1]; + if (((res[0] ^ atmp[0]) & _SIGNBIT64) && ((atmp[0] ^ btmp[0]) & _SIGNBIT64)) { + res[0] = (atmp[0] >> 63) ^ ~_SIGNBIT64; + } + if (((res[1] ^ atmp[1]) & _SIGNBIT64) && ((atmp[1] ^ btmp[1]) & _SIGNBIT64)) { + res[1] = (atmp[1] >> 63) ^ ~_SIGNBIT64; + } + return _mm_load_si128((__m128i*)res); +} + +_NEON2SSE_GLOBAL uint8x16_t vqsubq_u8(uint8x16_t a, uint8x16_t b); // VQSUB.U8 q0,q0,q0 +#define vqsubq_u8 _mm_subs_epu8 + +_NEON2SSE_GLOBAL uint16x8_t vqsubq_u16(uint16x8_t a, uint16x8_t b); // VQSUB.s16 q0,q0,q0 +#define vqsubq_u16 _mm_subs_epu16 + +_NEON2SSESTORAGE uint32x4_t vqsubq_u32(uint32x4_t a, uint32x4_t b); // VQSUB.U32 q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vqsubq_u32(uint32x4_t a, uint32x4_t b) // VQSUB.U32 q0,q0,q0 +{ + __m128i min, mask, sub; + min = _MM_MIN_EPU32(a, b); //SSE4.1 + mask = _mm_cmpeq_epi32 (min, b); + sub = _mm_sub_epi32 (a, b); + return _mm_and_si128 ( sub, mask); +} + +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL); // VQSUB.U64 q0,q0,q0 +#ifdef USE_SSE4 + _NEON2SSE_INLINE uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b) + { + __m128i c80000000, subb, suba, cmp, sub; + c80000000 = _mm_set_epi32 (0x80000000, 0x0, 0x80000000, 0x0); + sub = _mm_sub_epi64 (a, b); + suba = _mm_sub_epi64 (a, c80000000); + subb = _mm_sub_epi64 (b, c80000000); + cmp = _mm_cmpgt_epi64 ( suba, subb); //no unsigned comparison, need to go to signed, SSE4.2!!! + return _mm_and_si128 (sub, cmp); //saturation + } +#else + _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) + { + _NEON2SSE_ALIGN_16 uint64_t atmp[2], btmp[2], res[2]; + _mm_store_si128((__m128i*)atmp, a); + _mm_store_si128((__m128i*)btmp, b); + res[0] = (atmp[0] > btmp[0]) ? atmp[0] - btmp[0] : 0; + res[1] = (atmp[1] > btmp[1]) ? atmp[1] - btmp[1] : 0; + return _mm_load_si128((__m128i*)(res)); + } +#endif + +//**********Vector halving subtract Vr[i]:=(Va[i]-Vb[i])>>1 ****************************************************** +//**************************************************************** +_NEON2SSESTORAGE int8x8_t vhsub_s8(int8x8_t a, int8x8_t b); // VHSUB.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vhsub_s8(int8x8_t a, int8x8_t b) // VHSUB.S8 d0,d0,d0 +{ + //no 8 bit shift available, internal overflow is possible, so let's go to 16 bit, + int8x8_t res64; + __m128i a16, b16, r16; + a16 = _MM_CVTEPI8_EPI16(_pM128i(a)); //SSE 4.1 + b16 = _MM_CVTEPI8_EPI16(_pM128i(b)); //SSE 4.1 + r16 = _mm_sub_epi16(a16, b16); + r16 = _mm_srai_epi16 (r16, 1); //SSE2 + r16 = _mm_packs_epi16 (r16,r16); //use low 64 bits + return64(r16); +} + +_NEON2SSESTORAGE int16x4_t vhsub_s16(int16x4_t a, int16x4_t b); // VHSUB.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vhsub_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vhsubq_s16(_pM128i(a), _pM128i(b))); +} + + + +_NEON2SSESTORAGE int32x2_t vhsub_s32(int32x2_t a, int32x2_t b); // VHSUB.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vhsub_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vhsubq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint8x8_t vhsub_u8(uint8x8_t a, uint8x8_t b); // VHSUB.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vhsub_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(vhsubq_u8(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE uint16x4_t vhsub_u16(uint16x4_t a, uint16x4_t b); // VHSUB.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vhsub_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(vhsubq_u16(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE uint32x2_t vhsub_u32(uint32x2_t a, uint32x2_t b); // VHSUB.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vhsub_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64(vhsubq_u32(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b); // VHSUB.S8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b) // VHSUB.S8 q0,q0,q0 +{ + //need to deal with the possibility of internal overflow + __m128i c128, au,bu; + c128 = _mm_set1_epi8(-128); //(int8_t)0x80 + au = _mm_add_epi8( a, c128); + bu = _mm_add_epi8( b, c128); + return vhsubq_u8(au,bu); +} + +_NEON2SSESTORAGE int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b); // VHSUB.S16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b) // VHSUB.S16 q0,q0,q0 +{ + //need to deal with the possibility of internal overflow + __m128i c8000, au,bu; + c8000 = _mm_set1_epi16(-32768); //(int16_t)0x8000 + au = _mm_add_epi16( a, c8000); + bu = _mm_add_epi16( b, c8000); + return vhsubq_u16(au,bu); +} + +_NEON2SSESTORAGE int32x4_t vhsubq_s32(int32x4_t a, int32x4_t b); // VHSUB.S32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vhsubq_s32(int32x4_t a, int32x4_t b) // VHSUB.S32 q0,q0,q0 +{ + //need to deal with the possibility of internal overflow + __m128i a2, b2,r, b_1; + a2 = _mm_srai_epi32 (a,1); + b2 = _mm_srai_epi32 (b,1); + r = _mm_sub_epi32 (a2, b2); + b_1 = _mm_andnot_si128(a, b); //!a and b + b_1 = _mm_slli_epi32 (b_1,31); + b_1 = _mm_srli_epi32 (b_1,31); //0 or 1, last b bit + return _mm_sub_epi32(r,b_1); +} + +_NEON2SSESTORAGE uint8x16_t vhsubq_u8(uint8x16_t a, uint8x16_t b); // VHSUB.U8 q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vhsubq_u8(uint8x16_t a, uint8x16_t b) // VHSUB.U8 q0,q0,q0 +{ + __m128i avg; + avg = _mm_avg_epu8 (a, b); + return _mm_sub_epi8(a, avg); +} + +_NEON2SSESTORAGE uint16x8_t vhsubq_u16(uint16x8_t a, uint16x8_t b); // VHSUB.s16 q0,q0,q0 +_NEON2SSE_INLINE uint16x8_t vhsubq_u16(uint16x8_t a, uint16x8_t b) // VHSUB.s16 q0,q0,q0 +{ + __m128i avg; + avg = _mm_avg_epu16 (a, b); + return _mm_sub_epi16(a, avg); +} + +_NEON2SSESTORAGE uint32x4_t vhsubq_u32(uint32x4_t a, uint32x4_t b); // VHSUB.U32 q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vhsubq_u32(uint32x4_t a, uint32x4_t b) // VHSUB.U32 q0,q0,q0 +{ + //need to deal with the possibility of internal overflow + __m128i a2, b2,r, b_1; + a2 = _mm_srli_epi32 (a,1); + b2 = _mm_srli_epi32 (b,1); + r = _mm_sub_epi32 (a2, b2); + b_1 = _mm_andnot_si128(a, b); //!a and b + b_1 = _mm_slli_epi32 (b_1,31); + b_1 = _mm_srli_epi32 (b_1,31); //0 or 1, last b bit + return _mm_sub_epi32(r,b_1); +} + +//******* Vector subtract high half (truncated) ** ************ +//************************************************************ +_NEON2SSESTORAGE int8x8_t vsubhn_s16(int16x8_t a, int16x8_t b); // VSUBHN.I16 d0,q0,q0 +_NEON2SSE_INLINE int8x8_t vsubhn_s16(int16x8_t a, int16x8_t b) // VSUBHN.I16 d0,q0,q0 +{ + int8x8_t res64; + __m128i sum, sum8; + sum = _mm_sub_epi16 (a, b); + sum8 = _mm_srai_epi16 (sum, 8); + sum8 = _mm_packs_epi16(sum8,sum8); + return64(sum8); +} + +_NEON2SSESTORAGE int16x4_t vsubhn_s32(int32x4_t a, int32x4_t b); // VSUBHN.I32 d0,q0,q0 +_NEON2SSE_INLINE int16x4_t vsubhn_s32(int32x4_t a, int32x4_t b) // VSUBHN.I32 d0,q0,q0 +{ + int16x4_t res64; + __m128i sum, sum16; + sum = _mm_sub_epi32 (a, b); + sum16 = _mm_srai_epi32 (sum, 16); + sum16 = _mm_packs_epi32(sum16,sum16); + return64(sum16); +} + +_NEON2SSESTORAGE int32x2_t vsubhn_s64(int64x2_t a, int64x2_t b); // VSUBHN.I64 d0,q0,q0 +_NEON2SSE_INLINE int32x2_t vsubhn_s64(int64x2_t a, int64x2_t b) +{ + int32x2_t res64; + __m128i sub; + sub = _mm_sub_epi64 (a, b); + sub = _mm_shuffle_epi32(sub, 1 | (3 << 2) | (0 << 4) | (2 << 6)); + return64(sub); +} + +_NEON2SSESTORAGE uint8x8_t vsubhn_u16(uint16x8_t a, uint16x8_t b); // VSUBHN.I16 d0,q0,q0 +_NEON2SSE_INLINE uint8x8_t vsubhn_u16(uint16x8_t a, uint16x8_t b) // VSUBHN.I16 d0,q0,q0 +{ + uint8x8_t res64; + __m128i sum, sum8; + sum = _mm_sub_epi16 (a, b); + sum8 = _mm_srli_epi16 (sum, 8); + sum8 = _mm_packus_epi16(sum8,sum8); + return64(sum8); +} + +_NEON2SSESTORAGE uint16x4_t vsubhn_u32(uint32x4_t a, uint32x4_t b); // VSUBHN.I32 d0,q0,q0 +_NEON2SSE_INLINE uint16x4_t vsubhn_u32(uint32x4_t a, uint32x4_t b) // VSUBHN.I32 d0,q0,q0 +{ + uint16x4_t res64; + __m128i sum, sum16; + sum = _mm_sub_epi32 (a, b); + sum16 = _mm_srli_epi32 (sum, 16); +#ifdef USE_SSE4 + sum16 = _MM_PACKUS1_EPI32(sum16); +#else + sum16 = _mm_shuffle_epi8 (sum16, *(__m128i*) mask8_32_even_odd); //go to 16 bits +#endif + return64(sum16); +} + +_NEON2SSE_GLOBAL uint32x2_t vsubhn_u64(uint64x2_t a, uint64x2_t b); // VSUBHN.I64 d0,q0,q0 +#define vsubhn_u64 vsubhn_s64 + +//************ Vector rounding subtract high half ********************* +//********************************************************************* +_NEON2SSESTORAGE int8x8_t vrsubhn_s16(int16x8_t a, int16x8_t b); // VRSUBHN.I16 d0,q0,q0 +_NEON2SSE_INLINE int8x8_t vrsubhn_s16(int16x8_t a, int16x8_t b) // VRSUBHN.I16 d0,q0,q0 +{ + int8x8_t res64; + __m128i sub, mask1; + sub = _mm_sub_epi16 (a, b); + mask1 = _mm_slli_epi16(sub, 8); //shift left then back right to + mask1 = _mm_srli_epi16(mask1, 15); //get 7-th bit 1 or zero + sub = _mm_srai_epi16 (sub, 8); //get high half + sub = _mm_add_epi16 (sub, mask1); //actual rounding + sub = _mm_packs_epi16 (sub, sub); + return64(sub); +} + +_NEON2SSESTORAGE int16x4_t vrsubhn_s32(int32x4_t a, int32x4_t b); // VRSUBHN.I32 d0,q0,q0 +_NEON2SSE_INLINE int16x4_t vrsubhn_s32(int32x4_t a, int32x4_t b) // VRSUBHN.I32 d0,q0,q0 +{ + //SIMD may be not optimal, serial may be faster + int16x4_t res64; + __m128i sub, mask1; + sub = _mm_sub_epi32 (a, b); + mask1 = _mm_slli_epi32(sub, 16); //shift left then back right to + mask1 = _mm_srli_epi32(mask1,31); //get 15-th bit 1 or zero + sub = _mm_srai_epi32 (sub, 16); //get high half + sub = _mm_add_epi32 (sub, mask1); //actual rounding + sub = _mm_packs_epi32 (sub, sub); + return64(sub); +} + +_NEON2SSESTORAGE int32x2_t vrsubhn_s64(int64x2_t a, int64x2_t b); // VRSUBHN.I64 d0,q0,q0 +_NEON2SSE_INLINE int32x2_t vrsubhn_s64(int64x2_t a, int64x2_t b) +{ + //SIMD may be not optimal, serial may be faster + int32x2_t res64; + __m128i sub, mask1; + sub = _mm_sub_epi64 (a, b); + mask1 = _mm_slli_epi64(sub, 32); //shift left then back right to + mask1 = _mm_srli_epi64(mask1,31); //get 31-th bit 1 or zero + sub = _mm_add_epi32 (sub, mask1); //actual high half rounding + sub = _mm_shuffle_epi32(sub, 1 | (3 << 2) | (0 << 4) | (2 << 6)); + return64(sub); +} + +_NEON2SSESTORAGE uint8x8_t vrsubhn_u16(uint16x8_t a, uint16x8_t b); // VRSUBHN.I16 d0,q0,q0 +_NEON2SSE_INLINE uint8x8_t vrsubhn_u16(uint16x8_t a, uint16x8_t b) // VRSUBHN.I16 d0,q0,q0 +{ + uint8x8_t res64; + __m128i sub, mask1; + sub = _mm_sub_epi16 (a, b); + mask1 = _mm_slli_epi16(sub, 8); //shift left then back right to + mask1 = _mm_srli_epi16(mask1, 15); //get 7-th bit 1 or zero + sub = _mm_srai_epi16 (sub, 8); //get high half + sub = _mm_add_epi16 (sub, mask1); //actual rounding + sub = _mm_packus_epi16 (sub, sub); + return64(sub); +} + +_NEON2SSESTORAGE uint16x4_t vrsubhn_u32(uint32x4_t a, uint32x4_t b); // VRSUBHN.I32 d0,q0,q0 +_NEON2SSE_INLINE uint16x4_t vrsubhn_u32(uint32x4_t a, uint32x4_t b) // VRSUBHN.I32 d0,q0,q0 +{ + //SIMD may be not optimal, serial may be faster + uint16x4_t res64; + __m128i sub, mask1; + sub = _mm_sub_epi32 (a, b); + mask1 = _mm_slli_epi32(sub, 16); //shift left then back right to + mask1 = _mm_srli_epi32(mask1,31); //get 15-th bit 1 or zero + sub = _mm_srai_epi32 (sub, 16); //get high half + sub = _mm_add_epi32 (sub, mask1); //actual rounding +#ifdef USE_SSE4 + sub = _MM_PACKUS1_EPI32 (sub); +#else + sub = _mm_shuffle_epi8 (sub, *(__m128i*) mask8_32_even_odd); //go to 16 bits +#endif + return64(sub); +} + +_NEON2SSE_GLOBAL uint32x2_t vrsubhn_u64(uint64x2_t a, uint64x2_t b); // VRSUBHN.I64 d0,q0,q0 +#define vrsubhn_u64 vrsubhn_s64 + +//*********** Vector saturating doubling multiply subtract long ******************** +//************************************************************************************ +_NEON2SSESTORAGE int32x4_t vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VQDMLSL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) +{ + //not optimal SIMD soulution, serial may be faster + __m128i res32, mask; + int32x4_t res; + _NEON2SSE_ALIGN_16 static const uint32_t cmask[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + res = vmull_s16(b, c); + res32 = _mm_slli_epi32 (res, 1); //double the result, saturation not considered + mask = _mm_cmpeq_epi32 (res32, *(__m128i*)cmask); + res32 = _mm_xor_si128 (res32, mask); //res32 saturated for 0x80000000 + return vqsubq_s32(a, res32); //saturation +} + +_NEON2SSESTORAGE int64x2_t vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VQDMLSL.S32 q0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c), _NEON2SSE_REASON_SLOW_SERIAL) +{ + __m128i res64, mask; + int64x2_t res; + _NEON2SSE_ALIGN_16 static const uint64_t cmask[] = {0x8000000000000000, 0x8000000000000000}; + res = vmull_s32(b, c); + res64 = _mm_slli_epi64 (res, 1); //double the result, saturation not considered + mask = _MM_CMPEQ_EPI64 (res64, *(__m128i*)cmask); + res64 = _mm_xor_si128 (res64, mask); //res32 saturated for 0x80000000 + return vqsubq_s64(a, res64); //saturation +} + +//****************** COMPARISON *************************************** +//******************* Vector compare equal ************************************* +//**************************************************************************** +_NEON2SSESTORAGE uint8x8_t vceq_s8(int8x8_t a, int8x8_t b); // VCEQ.I8 d0, d0, d0 +_NEON2SSE_INLINE int8x8_t vceq_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_cmpeq_epi8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vceq_s16(int16x4_t a, int16x4_t b); // VCEQ.I16 d0, d0, d0 +_NEON2SSE_INLINE int16x4_t vceq_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_cmpeq_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vceq_s32(int32x2_t a, int32x2_t b); // VCEQ.I32 d0, d0, d0 +_NEON2SSE_INLINE int32x2_t vceq_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(_mm_cmpeq_epi32(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vceq_f32(float32x2_t a, float32x2_t b); // VCEQ.F32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vceq_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128 res; + res = _mm_cmpeq_ps(_pM128(a), _pM128(b) ); + return64f(res); +} + +_NEON2SSESTORAGE uint8x8_t vceq_u8(uint8x8_t a, uint8x8_t b); // VCEQ.I8 d0, d0, d0 +_NEON2SSE_INLINE uint8x8_t vceq_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(_mm_cmpeq_epi8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vceq_u16(uint16x4_t a, uint16x4_t b); // VCEQ.I16 d0, d0, d0 +_NEON2SSE_INLINE uint16x4_t vceq_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(_mm_cmpeq_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vceq_u32(uint32x2_t a, uint32x2_t b); // VCEQ.I32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vceq_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64(_mm_cmpeq_epi32(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSE_GLOBAL uint8x8_t vceq_p8(poly8x8_t a, poly8x8_t b); // VCEQ.I8 d0, d0, d0 +#define vceq_p8 vceq_u8 + + +_NEON2SSE_GLOBAL uint8x16_t vceqq_s8(int8x16_t a, int8x16_t b); // VCEQ.I8 q0, q0, q0 +#define vceqq_s8 _mm_cmpeq_epi8 + +_NEON2SSE_GLOBAL uint16x8_t vceqq_s16(int16x8_t a, int16x8_t b); // VCEQ.I16 q0, q0, q0 +#define vceqq_s16 _mm_cmpeq_epi16 + +_NEON2SSE_GLOBAL uint32x4_t vceqq_s32(int32x4_t a, int32x4_t b); // VCEQ.I32 q0, q0, q0 +#define vceqq_s32 _mm_cmpeq_epi32 + +_NEON2SSESTORAGE uint32x4_t vceqq_f32(float32x4_t a, float32x4_t b); // VCEQ.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vceqq_f32(float32x4_t a, float32x4_t b) +{ + __m128 res; + res = _mm_cmpeq_ps(a,b); + return _M128i(res); +} + +_NEON2SSE_GLOBAL uint8x16_t vceqq_u8(uint8x16_t a, uint8x16_t b); // VCEQ.I8 q0, q0, q0 +#define vceqq_u8 _mm_cmpeq_epi8 + +_NEON2SSE_GLOBAL uint16x8_t vceqq_u16(uint16x8_t a, uint16x8_t b); // VCEQ.I16 q0, q0, q0 +#define vceqq_u16 _mm_cmpeq_epi16 + +_NEON2SSE_GLOBAL uint32x4_t vceqq_u32(uint32x4_t a, uint32x4_t b); // VCEQ.I32 q0, q0, q0 +#define vceqq_u32 _mm_cmpeq_epi32 + +_NEON2SSE_GLOBAL uint8x16_t vceqq_p8(poly8x16_t a, poly8x16_t b); // VCEQ.I8 q0, q0, q0 +#define vceqq_p8 _mm_cmpeq_epi8 + +//******************Vector compare greater-than or equal************************* +//******************************************************************************* +//in IA SIMD no greater-than-or-equal comparison for integers, +// there is greater-than available only, so we need the following tricks + +_NEON2SSESTORAGE uint8x8_t vcge_s8(int8x8_t a, int8x8_t b); // VCGE.S8 d0, d0, d0 +_NEON2SSE_INLINE int8x8_t vcge_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(vcgeq_s8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vcge_s16(int16x4_t a, int16x4_t b); // VCGE.S16 d0, d0, d0 +_NEON2SSE_INLINE int16x4_t vcge_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vcgeq_s16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcge_s32(int32x2_t a, int32x2_t b); // VCGE.S32 d0, d0, d0 +_NEON2SSE_INLINE int32x2_t vcge_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vcgeq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcge_f32(float32x2_t a, float32x2_t b); // VCGE.F32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcge_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128 res; + res = _mm_cmpge_ps(_pM128(a),_pM128(b)); //use only 2 first entries + return64f(res); +} + +_NEON2SSESTORAGE uint8x8_t vcge_u8(uint8x8_t a, uint8x8_t b); // VCGE.U8 d0, d0, d0 +_NEON2SSE_INLINE uint8x8_t vcge_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(vcgeq_u8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vcge_u16(uint16x4_t a, uint16x4_t b); // VCGE.s16 d0, d0, d0 +_NEON2SSE_INLINE uint16x4_t vcge_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(vcgeq_u16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcge_u32(uint32x2_t a, uint32x2_t b); // VCGE.U32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcge_u32(uint32x2_t a, uint32x2_t b) +{ + //serial solution looks faster + uint32x2_t res64; + return64(vcgeq_u32 (_pM128i(a), _pM128i(b))); +} + + + +_NEON2SSESTORAGE uint8x16_t vcgeq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0 +_NEON2SSE_INLINE uint8x16_t vcgeq_s8(int8x16_t a, int8x16_t b) // VCGE.S8 q0, q0, q0 +{ + __m128i m1, m2; + m1 = _mm_cmpgt_epi8 ( a, b); + m2 = _mm_cmpeq_epi8 ( a, b); + return _mm_or_si128 ( m1, m2); +} + +_NEON2SSESTORAGE uint16x8_t vcgeq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0 +_NEON2SSE_INLINE uint16x8_t vcgeq_s16(int16x8_t a, int16x8_t b) // VCGE.S16 q0, q0, q0 +{ + __m128i m1, m2; + m1 = _mm_cmpgt_epi16 ( a, b); + m2 = _mm_cmpeq_epi16 ( a, b); + return _mm_or_si128 ( m1,m2); +} + +_NEON2SSESTORAGE uint32x4_t vcgeq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcgeq_s32(int32x4_t a, int32x4_t b) // VCGE.S32 q0, q0, q0 +{ + __m128i m1, m2; + m1 = _mm_cmpgt_epi32 (a, b); + m2 = _mm_cmpeq_epi32 (a, b); + return _mm_or_si128 (m1, m2); +} + +_NEON2SSESTORAGE uint32x4_t vcgeq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcgeq_f32(float32x4_t a, float32x4_t b) +{ + __m128 res; + res = _mm_cmpge_ps(a,b); //use only 2 first entries + return *(__m128i*)&res; +} + +_NEON2SSESTORAGE uint8x16_t vcgeq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0 +_NEON2SSE_INLINE uint8x16_t vcgeq_u8(uint8x16_t a, uint8x16_t b) // VCGE.U8 q0, q0, q0 +{ + //no unsigned chars comparison, only signed available,so need the trick + __m128i cmp; + cmp = _mm_max_epu8(a, b); + return _mm_cmpeq_epi8(cmp, a); //a>=b +} + +_NEON2SSESTORAGE uint16x8_t vcgeq_u16(uint16x8_t a, uint16x8_t b); // VCGE.s16 q0, q0, q0 +_NEON2SSE_INLINE uint16x8_t vcgeq_u16(uint16x8_t a, uint16x8_t b) // VCGE.s16 q0, q0, q0 +{ + //no unsigned shorts comparison, only signed available,so need the trick +#ifdef USE_SSE4 + __m128i cmp; + cmp = _mm_max_epu16(a, b); + return _mm_cmpeq_epi16(cmp, a); //a>=b +#else + __m128i zero = _mm_setzero_si128(); + __m128i as = _mm_subs_epu16(b, a); + return _mm_cmpeq_epi16(as, zero); +#endif +} + +_NEON2SSESTORAGE uint32x4_t vcgeq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcgeq_u32(uint32x4_t a, uint32x4_t b) // VCGE.U32 q0, q0, q0 +{ + //no unsigned ints comparison, only signed available,so need the trick +#ifdef USE_SSE4 + __m128i cmp; + cmp = _mm_max_epu32(a, b); + return _mm_cmpeq_epi32(cmp, a); //a>=b +#else + //serial solution may be faster + __m128i c80000000, as, bs, m1, m2; + c80000000 = _mm_set1_epi32 (0x80000000); + as = _mm_sub_epi32(a,c80000000); + bs = _mm_sub_epi32(b,c80000000); + m1 = _mm_cmpgt_epi32 (as, bs); + m2 = _mm_cmpeq_epi32 (as, bs); + return _mm_or_si128 ( m1, m2); +#endif +} + +//**********************Vector compare less-than or equal****************************** +//*************************************************************************************** +//in IA SIMD no less-than-or-equal comparison for integers present, so we need the tricks + +_NEON2SSESTORAGE uint8x8_t vcle_s8(int8x8_t a, int8x8_t b); // VCGE.S8 d0, d0, d0 +_NEON2SSE_INLINE int8x8_t vcle_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(vcleq_s8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vcle_s16(int16x4_t a, int16x4_t b); // VCGE.S16 d0, d0, d0 +_NEON2SSE_INLINE int16x4_t vcle_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vcleq_s16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcle_s32(int32x2_t a, int32x2_t b); // VCGE.S32 d0, d0, d0 +_NEON2SSE_INLINE int32x2_t vcle_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vcleq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcle_f32(float32x2_t a, float32x2_t b); // VCGE.F32 d0, d0, d0? +_NEON2SSE_INLINE uint32x2_t vcle_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128 res; + res = _mm_cmple_ps(_pM128(a),_pM128(b)); + return64f(res); +} + +_NEON2SSE_GLOBAL uint8x8_t vcle_u8(uint8x8_t a, uint8x8_t b); // VCGE.U8 d0, d0, d0 +#define vcle_u8(a,b) vcge_u8(b,a) + + +_NEON2SSE_GLOBAL uint16x4_t vcle_u16(uint16x4_t a, uint16x4_t b); // VCGE.s16 d0, d0, d0 +#define vcle_u16(a,b) vcge_u16(b,a) + + +_NEON2SSE_GLOBAL uint32x2_t vcle_u32(uint32x2_t a, uint32x2_t b); // VCGE.U32 d0, d0, d0 +#define vcle_u32(a,b) vcge_u32(b,a) + +_NEON2SSESTORAGE uint8x16_t vcleq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0 +_NEON2SSE_INLINE uint8x16_t vcleq_s8(int8x16_t a, int8x16_t b) // VCGE.S8 q0, q0, q0 +{ + __m128i c1, res; + c1 = _mm_cmpeq_epi8 (a,a); //all ones 0xff.... + res = _mm_cmpgt_epi8 ( a, b); + return _mm_andnot_si128 (res, c1); //inverse the cmpgt result, get less-than-or-equal +} + +_NEON2SSESTORAGE uint16x8_t vcleq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0 +_NEON2SSE_INLINE uint16x8_t vcleq_s16(int16x8_t a, int16x8_t b) // VCGE.S16 q0, q0, q0 +{ + __m128i c1, res; + c1 = _mm_cmpeq_epi16 (a,a); //all ones 0xff.... + res = _mm_cmpgt_epi16 ( a, b); + return _mm_andnot_si128 (res, c1); +} + +_NEON2SSESTORAGE uint32x4_t vcleq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcleq_s32(int32x4_t a, int32x4_t b) // VCGE.S32 q0, q0, q0 +{ + __m128i c1, res; + c1 = _mm_cmpeq_epi32 (a,a); //all ones 0xff.... + res = _mm_cmpgt_epi32 ( a, b); + return _mm_andnot_si128 (res, c1); +} + +_NEON2SSESTORAGE uint32x4_t vcleq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcleq_f32(float32x4_t a, float32x4_t b) +{ + __m128 res; + res = _mm_cmple_ps(a,b); + return *(__m128i*)&res; +} + +_NEON2SSESTORAGE uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0 +#ifdef USE_SSE4 + _NEON2SSE_INLINE uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b) // VCGE.U8 q0, q0, q0 + { + //no unsigned chars comparison in SSE, only signed available,so need the trick + __m128i cmp; + cmp = _mm_min_epu8(a, b); + return _mm_cmpeq_epi8(cmp, a); //a<=b + } +#else + _NEON2SSE_INLINE uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b) // VCGE.U8 q0, q0, q0 + { + return vcgeq_u8(b, a); + } +#endif + +_NEON2SSESTORAGE uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b); // VCGE.s16 q0, q0, q0 +#ifdef USE_SSE4 + _NEON2SSE_INLINE uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b) // VCGE.s16 q0, q0, q0 + { + //no unsigned shorts comparison in SSE, only signed available,so need the trick + __m128i cmp; + cmp = _mm_min_epu16(a, b); + return _mm_cmpeq_epi16(cmp, a); //a<=b + } +#else + _NEON2SSE_INLINE uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b) // VCGE.s16 q0, q0, q0 + { + return vcgeq_u16(b, a); + } +#endif + +_NEON2SSESTORAGE uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0 +#ifdef USE_SSE4 + _NEON2SSE_INLINE uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b) // VCGE.U32 q0, q0, q0 + { + //no unsigned chars comparison in SSE, only signed available,so need the trick + __m128i cmp; + cmp = _mm_min_epu32(a, b); + return _mm_cmpeq_epi32(cmp, a); //a<=b + } +#else + _NEON2SSE_INLINE uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b) // VCGE.U32 q0, q0, q0 + { + return vcgeq_u32(b, a); + } +#endif + + +//****** Vector compare greater-than ****************************************** +//************************************************************************** +_NEON2SSESTORAGE uint8x8_t vcgt_s8(int8x8_t a, int8x8_t b); // VCGT.S8 d0, d0, d0 +_NEON2SSE_INLINE int8x8_t vcgt_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_cmpgt_epi8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vcgt_s16(int16x4_t a, int16x4_t b); // VCGT.S16 d0, d0, d0 +_NEON2SSE_INLINE int16x4_t vcgt_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_cmpgt_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcgt_s32(int32x2_t a, int32x2_t b); // VCGT.S32 d0, d0, d0 +_NEON2SSE_INLINE int32x2_t vcgt_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(_mm_cmpgt_epi32(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcgt_f32(float32x2_t a, float32x2_t b); // VCGT.F32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcgt_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128 res; + res = _mm_cmpgt_ps(_pM128(a),_pM128(b)); //use only 2 first entries + return64f(res); +} + +_NEON2SSESTORAGE uint8x8_t vcgt_u8(uint8x8_t a, uint8x8_t b); // VCGT.U8 d0, d0, d0 +_NEON2SSE_INLINE uint8x8_t vcgt_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(vcgtq_u8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vcgt_u16(uint16x4_t a, uint16x4_t b); // VCGT.s16 d0, d0, d0 +_NEON2SSE_INLINE uint16x4_t vcgt_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(vcgtq_u16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vcgt_u32(uint32x2_t a, uint32x2_t b); // VCGT.U32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcgt_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64(vcgtq_u32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSE_GLOBAL uint8x16_t vcgtq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0 +#define vcgtq_s8 _mm_cmpgt_epi8 + +_NEON2SSE_GLOBAL uint16x8_t vcgtq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0 +#define vcgtq_s16 _mm_cmpgt_epi16 + +_NEON2SSE_GLOBAL uint32x4_t vcgtq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0 +#define vcgtq_s32 _mm_cmpgt_epi32 + +_NEON2SSESTORAGE uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b) +{ + __m128 res; + res = _mm_cmpgt_ps(a,b); //use only 2 first entries + return *(__m128i*)&res; +} + +_NEON2SSESTORAGE uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0 +_NEON2SSE_INLINE uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b) // VCGT.U8 q0, q0, q0 +{ + //no unsigned chars comparison, only signed available,so need the trick + __m128i c128, as, bs; + c128 = _mm_set1_epi8(-128); //(int8_t)0x80 + as = _mm_sub_epi8(a, c128); + bs = _mm_sub_epi8(b, c128); + return _mm_cmpgt_epi8(as, bs); +} + +_NEON2SSESTORAGE uint16x8_t vcgtq_u16(uint16x8_t a, uint16x8_t b); // VCGT.s16 q0, q0, q0 +_NEON2SSE_INLINE uint16x8_t vcgtq_u16(uint16x8_t a, uint16x8_t b) // VCGT.s16 q0, q0, q0 +{ + //no unsigned short comparison, only signed available,so need the trick + __m128i c8000, as, bs; + c8000 = _mm_set1_epi16(-32768); //(int16_t)0x8000 + as = _mm_sub_epi16(a, c8000); + bs = _mm_sub_epi16(b, c8000); + return _mm_cmpgt_epi16(as, bs); +} + +_NEON2SSESTORAGE uint32x4_t vcgtq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcgtq_u32(uint32x4_t a, uint32x4_t b) // VCGT.U32 q0, q0, q0 +{ + //no unsigned int comparison, only signed available,so need the trick + __m128i c80000000, as, bs; + c80000000 = _mm_set1_epi32 (0x80000000); + as = _mm_sub_epi32(a,c80000000); + bs = _mm_sub_epi32(b,c80000000); + return _mm_cmpgt_epi32 ( as, bs); +} + +//********************* Vector compare less-than ************************** +//************************************************************************* +_NEON2SSE_GLOBAL uint8x8_t vclt_s8(int8x8_t a, int8x8_t b); // VCGT.S8 d0, d0, d0 +#define vclt_s8(a,b) vcgt_s8(b,a) //swap the arguments!! + + +_NEON2SSE_GLOBAL uint16x4_t vclt_s16(int16x4_t a, int16x4_t b); // VCGT.S16 d0, d0, d0 +#define vclt_s16(a,b) vcgt_s16(b,a) //swap the arguments!! + + +_NEON2SSE_GLOBAL uint32x2_t vclt_s32(int32x2_t a, int32x2_t b); // VCGT.S32 d0, d0, d0 +#define vclt_s32(a,b) vcgt_s32(b,a) //swap the arguments!! + + +_NEON2SSE_GLOBAL uint32x2_t vclt_f32(float32x2_t a, float32x2_t b); // VCGT.F32 d0, d0, d0 +#define vclt_f32(a,b) vcgt_f32(b, a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint8x8_t vclt_u8(uint8x8_t a, uint8x8_t b); // VCGT.U8 d0, d0, d0 +#define vclt_u8(a,b) vcgt_u8(b,a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint16x4_t vclt_u16(uint16x4_t a, uint16x4_t b); // VCGT.s16 d0, d0, d0 +#define vclt_u16(a,b) vcgt_u16(b,a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint32x2_t vclt_u32(uint32x2_t a, uint32x2_t b); // VCGT.U32 d0, d0, d0 +#define vclt_u32(a,b) vcgt_u32(b,a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint8x16_t vcltq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0 +#define vcltq_s8(a,b) vcgtq_s8(b, a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint16x8_t vcltq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0 +#define vcltq_s16(a,b) vcgtq_s16(b, a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint32x4_t vcltq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0 +#define vcltq_s32(a,b) vcgtq_s32(b, a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint32x4_t vcltq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0 +#define vcltq_f32(a,b) vcgtq_f32(b, a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint8x16_t vcltq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0 +#define vcltq_u8(a,b) vcgtq_u8(b, a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint16x8_t vcltq_u16(uint16x8_t a, uint16x8_t b); // VCGT.s16 q0, q0, q0 +#define vcltq_u16(a,b) vcgtq_u16(b, a) //swap the arguments!! + +_NEON2SSE_GLOBAL uint32x4_t vcltq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0 +#define vcltq_u32(a,b) vcgtq_u32(b, a) //swap the arguments!! + +//*****************Vector compare absolute greater-than or equal ************ +//*************************************************************************** +_NEON2SSESTORAGE uint32x2_t vcage_f32(float32x2_t a, float32x2_t b); // VACGE.F32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcage_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff); + b0 = _mm_and_ps (_pM128(b), *(__m128*)&c7fffffff); + a0 = _mm_cmpge_ps ( a0, b0); + return64f(a0); +} + +_NEON2SSESTORAGE uint32x4_t vcageq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcageq_f32(float32x4_t a, float32x4_t b) // VACGE.F32 q0, q0, q0 +{ + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (a, *(__m128*)&c7fffffff); + b0 = _mm_and_ps (b, *(__m128*)&c7fffffff); + a0 = _mm_cmpge_ps ( a0, b0); + return (*(__m128i*)&a0); +} + +//********Vector compare absolute less-than or equal ****************** +//******************************************************************** +_NEON2SSESTORAGE uint32x2_t vcale_f32(float32x2_t a, float32x2_t b); // VACGE.F32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcale_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff); + b0 = _mm_and_ps (_pM128(b), *(__m128*)&c7fffffff); + a0 = _mm_cmple_ps (a0, b0); + return64f(a0); +} + +_NEON2SSESTORAGE uint32x4_t vcaleq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcaleq_f32(float32x4_t a, float32x4_t b) // VACGE.F32 q0, q0, q0 +{ + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (a, *(__m128*)&c7fffffff); + b0 = _mm_and_ps (b, *(__m128*)&c7fffffff); + a0 = _mm_cmple_ps (a0, b0); + return (*(__m128i*)&a0); +} + +//******** Vector compare absolute greater-than ****************** +//****************************************************************** +_NEON2SSESTORAGE uint32x2_t vcagt_f32(float32x2_t a, float32x2_t b); // VACGT.F32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcagt_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff); + b0 = _mm_and_ps (_pM128(b), *(__m128*)&c7fffffff); + a0 = _mm_cmpgt_ps (a0, b0); + return64f(a0); +} + +_NEON2SSESTORAGE uint32x4_t vcagtq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcagtq_f32(float32x4_t a, float32x4_t b) // VACGT.F32 q0, q0, q0 +{ + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (a, *(__m128*)&c7fffffff); + b0 = _mm_and_ps (b, *(__m128*)&c7fffffff); + a0 = _mm_cmpgt_ps (a0, b0); + return (*(__m128i*)&a0); +} + +//***************Vector compare absolute less-than *********************** +//************************************************************************* +_NEON2SSESTORAGE uint32x2_t vcalt_f32(float32x2_t a, float32x2_t b); // VACGT.F32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vcalt_f32(float32x2_t a, float32x2_t b) +{ + uint32x2_t res64; + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff); + b0 = _mm_and_ps (_pM128(b), *(__m128*)&c7fffffff); + a0 = _mm_cmplt_ps (a0, b0); + return64f(a0); +} + +_NEON2SSESTORAGE uint32x4_t vcaltq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vcaltq_f32(float32x4_t a, float32x4_t b) // VACGT.F32 q0, q0, q0 +{ + __m128i c7fffffff; + __m128 a0, b0; + c7fffffff = _mm_set1_epi32 (0x7fffffff); + a0 = _mm_and_ps (a, *(__m128*)&c7fffffff); + b0 = _mm_and_ps (b, *(__m128*)&c7fffffff); + a0 = _mm_cmplt_ps (a0, b0); + return (*(__m128i*)&a0); +} + +//*************************Vector test bits************************************ +//***************************************************************************** +/*VTST (Vector Test Bits) takes each element in a vector, and bitwise logical ANDs them +with the corresponding element of a second vector. If the result is not zero, the +corresponding element in the destination vector is set to all ones. Otherwise, it is set to +all zeros. */ + +_NEON2SSESTORAGE uint8x8_t vtst_s8(int8x8_t a, int8x8_t b); // VTST.8 d0, d0, d0 +_NEON2SSE_INLINE uint8x8_t vtst_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(vtstq_s8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vtst_s16(int16x4_t a, int16x4_t b); // VTST.16 d0, d0, d0 +_NEON2SSE_INLINE uint16x4_t vtst_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vtstq_s16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vtst_s32(int32x2_t a, int32x2_t b); // VTST.32 d0, d0, d0 +_NEON2SSE_INLINE uint32x2_t vtst_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vtstq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSE_GLOBAL uint8x8_t vtst_u8(uint8x8_t a, uint8x8_t b); // VTST.8 d0, d0, d0 +#define vtst_u8 vtst_s8 + +_NEON2SSE_GLOBAL uint16x4_t vtst_u16(uint16x4_t a, uint16x4_t b); // VTST.16 d0, d0, d0 +#define vtst_u16 vtst_s16 + +_NEON2SSE_GLOBAL uint32x2_t vtst_u32(uint32x2_t a, uint32x2_t b); // VTST.32 d0, d0, d0 +#define vtst_u32 vtst_s32 + + +_NEON2SSE_GLOBAL uint8x8_t vtst_p8(poly8x8_t a, poly8x8_t b); // VTST.8 d0, d0, d0 +#define vtst_p8 vtst_u8 + +_NEON2SSESTORAGE uint8x16_t vtstq_s8(int8x16_t a, int8x16_t b); // VTST.8 q0, q0, q0 +_NEON2SSE_INLINE uint8x16_t vtstq_s8(int8x16_t a, int8x16_t b) // VTST.8 q0, q0, q0 +{ + __m128i zero, one, res; + zero = _mm_setzero_si128 (); + one = _mm_cmpeq_epi8(zero,zero); //0xfff..ffff + res = _mm_and_si128 (a, b); + res = _mm_cmpeq_epi8 (res, zero); + return _mm_xor_si128(res, one); //invert result +} + +_NEON2SSESTORAGE uint16x8_t vtstq_s16(int16x8_t a, int16x8_t b); // VTST.16 q0, q0, q0 +_NEON2SSE_INLINE uint16x8_t vtstq_s16(int16x8_t a, int16x8_t b) // VTST.16 q0, q0, q0 +{ + __m128i zero, one, res; + zero = _mm_setzero_si128 (); + one = _mm_cmpeq_epi8(zero,zero); //0xfff..ffff + res = _mm_and_si128 (a, b); + res = _mm_cmpeq_epi16 (res, zero); + return _mm_xor_si128(res, one); //invert result +} + +_NEON2SSESTORAGE uint32x4_t vtstq_s32(int32x4_t a, int32x4_t b); // VTST.32 q0, q0, q0 +_NEON2SSE_INLINE uint32x4_t vtstq_s32(int32x4_t a, int32x4_t b) // VTST.32 q0, q0, q0 +{ + __m128i zero, one, res; + zero = _mm_setzero_si128 (); + one = _mm_cmpeq_epi8(zero,zero); //0xfff..ffff + res = _mm_and_si128 (a, b); + res = _mm_cmpeq_epi32 (res, zero); + return _mm_xor_si128(res, one); //invert result +} + +_NEON2SSE_GLOBAL uint8x16_t vtstq_u8(uint8x16_t a, uint8x16_t b); // VTST.8 q0, q0, q0 +#define vtstq_u8 vtstq_s8 + +_NEON2SSE_GLOBAL uint16x8_t vtstq_u16(uint16x8_t a, uint16x8_t b); // VTST.16 q0, q0, q0 +#define vtstq_u16 vtstq_s16 + +_NEON2SSE_GLOBAL uint32x4_t vtstq_u32(uint32x4_t a, uint32x4_t b); // VTST.32 q0, q0, q0 +#define vtstq_u32 vtstq_s32 + +_NEON2SSE_GLOBAL uint8x16_t vtstq_p8(poly8x16_t a, poly8x16_t b); // VTST.8 q0, q0, q0 +#define vtstq_p8 vtstq_u8 + +//****************** Absolute difference ******************** +//*** Absolute difference between the arguments: Vr[i] = | Va[i] - Vb[i] |***** +//************************************************************ +_NEON2SSESTORAGE int8x8_t vabd_s8(int8x8_t a, int8x8_t b); // VABD.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vabd_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(vabdq_s8(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE int16x4_t vabd_s16(int16x4_t a, int16x4_t b); // VABD.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vabd_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vabdq_s16(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE int32x2_t vabd_s32(int32x2_t a, int32x2_t b); // VABD.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vabd_s32(int32x2_t a, int32x2_t b) +{//need to deal with an intermediate overflow + int32x2_t res; + res.m64_i32[0] = (a.m64_i32[0] > b.m64_i32[0]) ? a.m64_i32[0] - b.m64_i32[0]: b.m64_i32[0] - a.m64_i32[0]; + res.m64_i32[1] = (a.m64_i32[1] > b.m64_i32[1]) ? a.m64_i32[1] - b.m64_i32[1]: b.m64_i32[1] - a.m64_i32[1]; + return res; +} + +_NEON2SSESTORAGE uint8x8_t vabd_u8(uint8x8_t a, uint8x8_t b); // VABD.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vabd_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(vabdq_u8(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE uint16x4_t vabd_u16(uint16x4_t a, uint16x4_t b); // VABD.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vabd_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(vabdq_u16(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE uint32x2_t vabd_u32(uint32x2_t a, uint32x2_t b); // VABD.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vabd_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + return64(vabdq_u32(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE float32x2_t vabd_f32(float32x2_t a, float32x2_t b); // VABD.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vabd_f32(float32x2_t a, float32x2_t b) +{ + float32x4_t res; + __m64_128 res64; + res = vabdq_f32(_pM128(a), _pM128(b)); + _M64f(res64, res); + return res64; +} + +_NEON2SSESTORAGE int8x16_t vabdq_s8(int8x16_t a, int8x16_t b); // VABD.S8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vabdq_s8(int8x16_t a, int8x16_t b) // VABD.S8 q0,q0,q0 +{ //need to deal with an intermediate overflow + __m128i cmp, difab, difba; + cmp = vcgtq_s8(a,b); + difab = _mm_sub_epi8(a,b); + difba = _mm_sub_epi8(b,a); + difab = _mm_and_si128(cmp, difab); + difba = _mm_andnot_si128(cmp, difba); + return _mm_or_si128(difab, difba); +} + +_NEON2SSESTORAGE int16x8_t vabdq_s16(int16x8_t a, int16x8_t b); // VABD.S16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vabdq_s16(int16x8_t a, int16x8_t b) // VABD.S16 q0,q0,q0 +{//need to deal with an intermediate overflow + __m128i cmp, difab, difba; + cmp = vcgtq_s16(a,b); + difab = _mm_sub_epi16(a,b); + difba = _mm_sub_epi16 (b,a); + difab = _mm_and_si128(cmp, difab); + difba = _mm_andnot_si128(cmp, difba); + return _mm_or_si128(difab, difba); +} + +_NEON2SSESTORAGE int32x4_t vabdq_s32(int32x4_t a, int32x4_t b); // VABD.S32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vabdq_s32(int32x4_t a, int32x4_t b) // VABD.S32 q0,q0,q0 +{//need to deal with an intermediate overflow + __m128i cmp, difab, difba; + cmp = vcgtq_s32(a,b); + difab = _mm_sub_epi32(a,b); + difba = _mm_sub_epi32(b,a); + difab = _mm_and_si128(cmp, difab); + difba = _mm_andnot_si128(cmp, difba); + return _mm_or_si128(difab, difba); +} + +_NEON2SSESTORAGE uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b); // VABD.U8 q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b) //no abs for unsigned +{ + __m128i difab, difba; + difab = _mm_subs_epu8(a,b); + difba = _mm_subs_epu8 (b,a); + return _mm_or_si128(difab, difba); +} + +_NEON2SSESTORAGE uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b); // VABD.s16 q0,q0,q0 +_NEON2SSE_INLINE uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b) +{ + __m128i difab, difba; + difab = _mm_subs_epu16(a,b); + difba = _mm_subs_epu16 (b,a); + return _mm_or_si128(difab, difba); +} + +_NEON2SSESTORAGE uint32x4_t vabdq_u32(uint32x4_t a, uint32x4_t b); // VABD.U32 q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vabdq_u32(uint32x4_t a, uint32x4_t b) +{ + __m128i cmp, difab, difba; + cmp = vcgtq_u32(a,b); + difab = _mm_sub_epi32(a,b); + difba = _mm_sub_epi32 (b,a); + difab = _mm_and_si128(cmp, difab); + difba = _mm_andnot_si128(cmp, difba); + return _mm_or_si128(difab, difba); +} + +_NEON2SSESTORAGE float32x4_t vabdq_f32(float32x4_t a, float32x4_t b); // VABD.F32 q0,q0,q0 +_NEON2SSE_INLINE float32x4_t vabdq_f32(float32x4_t a, float32x4_t b) // VABD.F32 q0,q0,q0 +{ + __m128i c1; + __m128 res; + c1 = _mm_set1_epi32(0x7fffffff); + res = _mm_sub_ps (a, b); + return _mm_and_ps (res, *(__m128*)&c1); +} + +//************ Absolute difference - long ************************** +//******************************************************************** +_NEON2SSESTORAGE int16x8_t vabdl_s8(int8x8_t a, int8x8_t b); // VABDL.S8 q0,d0,d0 +_NEON2SSE_INLINE int16x8_t vabdl_s8(int8x8_t a, int8x8_t b) // VABDL.S8 q0,d0,d0 +{ + __m128i a16, b16; + a16 = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE4.1, + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); //SSE4.1, + return vabdq_s16(a16, b16); + +} + +_NEON2SSESTORAGE int32x4_t vabdl_s16(int16x4_t a, int16x4_t b); // VABDL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vabdl_s16(int16x4_t a, int16x4_t b) // VABDL.S16 q0,d0,d0 +{ + __m128i a32, b32; + a32 = _MM_CVTEPI16_EPI32 (_pM128i(a)); //SSE4.1 + b32 = _MM_CVTEPI16_EPI32 (_pM128i(b)); //SSE4.1, + return vabdq_s32(a32, b32); +} + +_NEON2SSESTORAGE int64x2_t vabdl_s32(int32x2_t a, int32x2_t b); // VABDL.S32 q0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING (int64x2_t vabdl_s32(int32x2_t a, int32x2_t b),_NEON2SSE_REASON_SLOW_SERIAL) +{ + //no optimal SIMD solution, serial looks faster + _NEON2SSE_ALIGN_16 int64_t res[2]; + if(a.m64_i32[0] > b.m64_i32[0]) res[0] = ( int64_t) a.m64_i32[0] - ( int64_t) b.m64_i32[0]; + else res[0] = ( int64_t) b.m64_i32[0] - ( int64_t) a.m64_i32[0]; + if(a.m64_i32[1] > b.m64_i32[1]) res[1] = ( int64_t) a.m64_i32[1] - ( int64_t) b.m64_i32[1]; + else res[1] = ( int64_t) b.m64_i32[1] - ( int64_t) a.m64_i32[1]; + return _mm_load_si128((__m128i*)res); +} + +_NEON2SSESTORAGE uint16x8_t vabdl_u8(uint8x8_t a, uint8x8_t b); // VABDL.U8 q0,d0,d0 +_NEON2SSE_INLINE uint16x8_t vabdl_u8(uint8x8_t a, uint8x8_t b) +{ + __m128i res; + res = vsubl_u8(a,b); + return _mm_abs_epi16(res); +} + +_NEON2SSESTORAGE uint32x4_t vabdl_u16(uint16x4_t a, uint16x4_t b); // VABDL.s16 q0,d0,d0 +_NEON2SSE_INLINE uint32x4_t vabdl_u16(uint16x4_t a, uint16x4_t b) +{ + __m128i res; + res = vsubl_u16(a,b); + return _mm_abs_epi32(res); +} + +_NEON2SSESTORAGE uint64x2_t vabdl_u32(uint32x2_t a, uint32x2_t b); // VABDL.U32 q0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING (uint64x2_t vabdl_u32(uint32x2_t a, uint32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + _NEON2SSE_ALIGN_16 uint64_t res[2]; + if(a.m64_u32[0] > b.m64_u32[0]) res[0] = ( uint64_t) a.m64_u32[0] - ( uint64_t) b.m64_u32[0]; + else res[0] = ( uint64_t) b.m64_u32[0] - ( uint64_t) a.m64_u32[0]; + if(a.m64_u32[1] > b.m64_u32[1]) res[1] = ( uint64_t) a.m64_u32[1] - ( uint64_t) b.m64_u32[1]; + else res[1] = ( uint64_t) b.m64_u32[1] - ( uint64_t) a.m64_u32[1]; + return _mm_load_si128((__m128i*)res); +} + +//**********Absolute difference and accumulate: Vr[i] = Va[i] + | Vb[i] - Vc[i] | ************* +//********************************************************************************************* +_NEON2SSESTORAGE int8x8_t vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VABA.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) +{ + int8x8_t res64; + return64(vabaq_s8(_pM128i(a),_pM128i(b), _pM128i(c))); +} + +_NEON2SSESTORAGE int16x4_t vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c); // VABA.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) +{ + int16x4_t res64; + return64(vabaq_s16(_pM128i(a), _pM128i(b), _pM128i(c))); +} + +_NEON2SSESTORAGE int32x2_t vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c); // VABA.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) +{ + int32x2_t res64; + return64(vabaq_s32(_pM128i(a), _pM128i(b), _pM128i(c))); +} + +_NEON2SSESTORAGE uint8x8_t vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VABA.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) +{ + int8x8_t res64; + return64(vabaq_u8(_pM128i(a),_pM128i(b), _pM128i(c))); +} + + +_NEON2SSESTORAGE uint16x4_t vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VABA.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) +{ + int16x4_t res64; + return64(vabaq_u16(_pM128i(a), _pM128i(b), _pM128i(c))); +} + +_NEON2SSESTORAGE uint32x2_t vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VABA.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint32x2_t res64; + return64(vabaq_u32(_pM128i(a), _pM128i(b), _pM128i(c))); +} + +_NEON2SSESTORAGE int8x16_t vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VABA.S8 q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) // VABA.S8 q0,q0,q0 +{ + int8x16_t sub; + sub = vabdq_s8(b, c); + return vaddq_s8( a, sub); +} + +_NEON2SSESTORAGE int16x8_t vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VABA.S16 q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) // VABA.S16 q0,q0,q0 +{ + int16x8_t sub; + sub = vabdq_s16(b, c); + return vaddq_s16( a, sub); +} + +_NEON2SSESTORAGE int32x4_t vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VABA.S32 q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) // VABA.S32 q0,q0,q0 +{ + int32x4_t sub; + sub = vabdq_s32(b, c); + return vaddq_s32( a, sub); +} + +_NEON2SSESTORAGE uint8x16_t vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VABA.U8 q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) +{ + uint8x16_t sub; + sub = vabdq_u8(b, c); + return vaddq_u8( a, sub); +} + +_NEON2SSESTORAGE uint16x8_t vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VABA.s16 q0,q0,q0 +_NEON2SSE_INLINE uint16x8_t vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) +{ + uint16x8_t sub; + sub = vabdq_u16(b, c); + return vaddq_u16( a, sub); +} + +_NEON2SSESTORAGE uint32x4_t vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VABA.U32 q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) +{ + uint32x4_t sub; + sub = vabdq_u32(b, c); + return vaddq_u32( a, sub); +} + +//************** Absolute difference and accumulate - long ******************************** +//************************************************************************************* +_NEON2SSESTORAGE int16x8_t vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c); // VABAL.S8 q0,d0,d0 +_NEON2SSE_INLINE int16x8_t vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) // VABAL.S8 q0,d0,d0 +{ + __m128i b16, c16, res; + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); //SSE4.1, + c16 = _MM_CVTEPI8_EPI16 (_pM128i(c)); //SSE4.1, + res = _mm_abs_epi16 (_mm_sub_epi16 (b16, c16) ); + return _mm_add_epi16 (a, res); +} + +_NEON2SSESTORAGE int32x4_t vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VABAL.S16 q0,d0,d0 +_NEON2SSE_INLINE int32x4_t vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) // VABAL.S16 q0,d0,d0 +{ + __m128i b32, c32, res; + b32 = _MM_CVTEPI16_EPI32(_pM128i(b)); //SSE4.1 + c32 = _MM_CVTEPI16_EPI32(_pM128i(c)); //SSE4.1 + res = _mm_abs_epi32 (_mm_sub_epi32 (b32, c32) ); + return _mm_add_epi32 (a, res); +} + +_NEON2SSESTORAGE int64x2_t vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VABAL.S32 q0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING (int64x2_t vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c), _NEON2SSE_REASON_SLOW_SERIAL) +{ + __m128i res; + res = vabdl_s32(b,c); + return _mm_add_epi64(a, res); +} + +_NEON2SSESTORAGE uint16x8_t vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c); // VABAL.U8 q0,d0,d0 +_NEON2SSE_INLINE uint16x8_t vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) +{ + __m128i b16, c16, res; + b16 = _MM_CVTEPU8_EPI16 (_pM128i(b)); //SSE4.1, + c16 = _MM_CVTEPU8_EPI16 (_pM128i(c)); //SSE4.1, + res = _mm_abs_epi16 (_mm_sub_epi16 (b16, c16) ); + return _mm_add_epi16 (a, res); +} + +_NEON2SSESTORAGE uint32x4_t vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VABAL.s16 q0,d0,d0 +_NEON2SSE_INLINE uint32x4_t vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) +{ + __m128i b32, c32, res; + b32 = _MM_CVTEPU16_EPI32(_pM128i(b)); //SSE4.1 + c32 = _MM_CVTEPU16_EPI32(_pM128i(c)); //SSE4.1 + res = _mm_abs_epi32 (_mm_sub_epi32 (b32, c32) ); + return _mm_add_epi32 (a, res); +} + +_NEON2SSESTORAGE uint64x2_t vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VABAL.U32 q0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING (uint64x2_t vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c), _NEON2SSE_REASON_SLOW_SERIAL) +{ + __m128i res; + res = vabdl_u32(b,c); + return _mm_add_epi64(a, res); +} + +//*********************************************************************************** +//**************** Maximum and minimum operations ********************************** +//*********************************************************************************** +//************* Maximum: vmax -> Vr[i] := (Va[i] >= Vb[i]) ? Va[i] : Vb[i] ******* +//*********************************************************************************** +_NEON2SSESTORAGE int8x8_t vmax_s8(int8x8_t a, int8x8_t b); // VMAX.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vmax_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + __m128i res; + res = _MM_MAX_EPI8(_pM128i(a),_pM128i(b)); //SSE4.1, use only lower 64 bits + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vmax_s16(int16x4_t a, int16x4_t b); // VMAX.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vmax_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_max_epi16(_pM128i(a),_pM128i(b))); +} + +_NEON2SSESTORAGE int32x2_t vmax_s32(int32x2_t a, int32x2_t b); // VMAX.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vmax_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + __m128i res; + res = _MM_MAX_EPI32(_pM128i(a),_pM128i(b)); //SSE4.1, use only lower 64 bits + return64(res); +} + +_NEON2SSESTORAGE uint8x8_t vmax_u8(uint8x8_t a, uint8x8_t b); // VMAX.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vmax_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(_mm_max_epu8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vmax_u16(uint16x4_t a, uint16x4_t b); // VMAX.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vmax_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(_MM_MAX_EPU16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vmax_u32(uint32x2_t a, uint32x2_t b); // VMAX.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vmax_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + __m128i res; + res = _MM_MAX_EPU32(_pM128i(a),_pM128i(b)); //SSE4.1, use only lower 64 bits, may be not effective compared with serial + return64(res); +} + +_NEON2SSESTORAGE float32x2_t vmax_f32(float32x2_t a, float32x2_t b); // VMAX.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vmax_f32(float32x2_t a, float32x2_t b) +{ + //serial solution looks faster than SIMD one + float32x2_t res; + res.m64_f32[0] = (a.m64_f32[0] > b.m64_f32[0]) ? a.m64_f32[0] : b.m64_f32[0]; + res.m64_f32[1] = (a.m64_f32[1] > b.m64_f32[1]) ? a.m64_f32[1] : b.m64_f32[1]; + return res; +} + +_NEON2SSE_GLOBAL int8x16_t vmaxq_s8(int8x16_t a, int8x16_t b); // VMAX.S8 q0,q0,q0 +#define vmaxq_s8 _MM_MAX_EPI8 //SSE4.1 + +_NEON2SSE_GLOBAL int16x8_t vmaxq_s16(int16x8_t a, int16x8_t b); // VMAX.S16 q0,q0,q0 +#define vmaxq_s16 _mm_max_epi16 + +_NEON2SSE_GLOBAL int32x4_t vmaxq_s32(int32x4_t a, int32x4_t b); // VMAX.S32 q0,q0,q0 +#define vmaxq_s32 _MM_MAX_EPI32 //SSE4.1 + +_NEON2SSE_GLOBAL uint8x16_t vmaxq_u8(uint8x16_t a, uint8x16_t b); // VMAX.U8 q0,q0,q0 +#define vmaxq_u8 _mm_max_epu8 + +_NEON2SSE_GLOBAL uint16x8_t vmaxq_u16(uint16x8_t a, uint16x8_t b); // VMAX.s16 q0,q0,q0 +#define vmaxq_u16 _MM_MAX_EPU16 //SSE4.1 + +_NEON2SSE_GLOBAL uint32x4_t vmaxq_u32(uint32x4_t a, uint32x4_t b); // VMAX.U32 q0,q0,q0 +#define vmaxq_u32 _MM_MAX_EPU32 //SSE4.1 + + +_NEON2SSE_GLOBAL float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0 +#define vmaxq_f32 _mm_max_ps + + +_NEON2SSE_GLOBAL float64x2_t vmaxq_f64(float64x2_t a, float64x2_t b); // VMAX.F64 q0,q0,q0 +#define vmaxq_f64 _mm_max_pd + + +//*************** Minimum: vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i] ******************************** +//*********************************************************************************************************** +_NEON2SSESTORAGE int8x8_t vmin_s8(int8x8_t a, int8x8_t b); // VMIN.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vmin_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + __m128i res; + res = _MM_MIN_EPI8(_pM128i(a),_pM128i(b)); //SSE4.1, use only lower 64 bits + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vmin_s16(int16x4_t a, int16x4_t b); // VMIN.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vmin_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_min_epi16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vmin_s32(int32x2_t a, int32x2_t b); // VMIN.S32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vmin_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + __m128i res; + res = _MM_MIN_EPI32(_pM128i(a),_pM128i(b)); //SSE4.1, use only lower 64 bits + return64(res); +} + +_NEON2SSESTORAGE uint8x8_t vmin_u8(uint8x8_t a, uint8x8_t b); // VMIN.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vmin_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + return64(_mm_min_epu8(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint16x4_t vmin_u16(uint16x4_t a, uint16x4_t b); // VMIN.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vmin_u16(uint16x4_t a, uint16x4_t b) +{ + uint16x4_t res64; + return64(_MM_MIN_EPU16(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vmin_u32(uint32x2_t a, uint32x2_t b); // VMIN.U32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vmin_u32(uint32x2_t a, uint32x2_t b) +{ + uint32x2_t res64; + __m128i res; + res = _MM_MIN_EPU32(_pM128i(a),_pM128i(b)); //SSE4.1, use only lower 64 bits, may be not effective compared with serial + return64(res); +} + +_NEON2SSESTORAGE float32x2_t vmin_f32(float32x2_t a, float32x2_t b); // VMIN.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vmin_f32(float32x2_t a, float32x2_t b) +{ + //serial solution looks faster than SIMD one + float32x2_t res; + res.m64_f32[0] = (a.m64_f32[0] < b.m64_f32[0]) ? a.m64_f32[0] : b.m64_f32[0]; + res.m64_f32[1] = (a.m64_f32[1] < b.m64_f32[1]) ? a.m64_f32[1] : b.m64_f32[1]; + return res; +} + +_NEON2SSE_GLOBAL int8x16_t vminq_s8(int8x16_t a, int8x16_t b); // VMIN.S8 q0,q0,q0 +#define vminq_s8 _MM_MIN_EPI8 //SSE4.1 + +_NEON2SSE_GLOBAL int16x8_t vminq_s16(int16x8_t a, int16x8_t b); // VMIN.S16 q0,q0,q0 +#define vminq_s16 _mm_min_epi16 + +_NEON2SSE_GLOBAL int32x4_t vminq_s32(int32x4_t a, int32x4_t b); // VMIN.S32 q0,q0,q0 +#define vminq_s32 _MM_MIN_EPI32 //SSE4.1 + +_NEON2SSE_GLOBAL uint8x16_t vminq_u8(uint8x16_t a, uint8x16_t b); // VMIN.U8 q0,q0,q0 +#define vminq_u8 _mm_min_epu8 + +_NEON2SSE_GLOBAL uint16x8_t vminq_u16(uint16x8_t a, uint16x8_t b); // VMIN.s16 q0,q0,q0 +#define vminq_u16 _MM_MIN_EPU16 //SSE4.1 + +_NEON2SSE_GLOBAL uint32x4_t vminq_u32(uint32x4_t a, uint32x4_t b); // VMIN.U32 q0,q0,q0 +#define vminq_u32 _MM_MIN_EPU32 //SSE4.1 + +_NEON2SSE_GLOBAL float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0 +#define vminq_f32 _mm_min_ps + + +_NEON2SSE_GLOBAL float64x2_t vminq_f64(float64x2_t a, float64x2_t b); // VMIN.F64 q0,q0,q0 +#define vminq_f64 _mm_min_pd + + +//************* Pairwise addition operations. ************************************** +//************************************************************************************ +//Pairwise add - adds adjacent pairs of elements of two vectors, and places the results in the destination vector +_NEON2SSESTORAGE int8x8_t vpadd_s8(int8x8_t a, int8x8_t b); // VPADD.I8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vpadd_s8(int8x8_t a, int8x8_t b) // VPADD.I8 d0,d0,d0 +{ + //no 8 bit hadd in IA32, need to go to 16 bit and then pack + int8x8_t res64; + __m128i a16, b16, res; + a16 = _MM_CVTEPI8_EPI16 (_pM128i(a)); // SSE 4.1 + b16 = _MM_CVTEPI8_EPI16 (_pM128i(b)); // SSE 4.1 + res = _mm_hadd_epi16 (a16, b16); + res = _mm_shuffle_epi8 (res, *(__m128i*) mask8_16_even_odd); //return to 8 bit, use low 64 bits + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vpadd_s16(int16x4_t a, int16x4_t b); // VPADD.I16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vpadd_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + __m128i hadd128; + hadd128 = _mm_hadd_epi16 (_pM128i(a), _pM128i(b)); + hadd128 = _mm_shuffle_epi32 (hadd128, 0 | (2 << 2) | (1 << 4) | (3 << 6)); + return64(hadd128); +} + + +_NEON2SSESTORAGE int32x2_t vpadd_s32(int32x2_t a, int32x2_t b); // VPADD.I32 d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vpadd_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + __m128i hadd128; + hadd128 = _mm_hadd_epi32 (_pM128i(a), _pM128i(b)); + hadd128 = _mm_shuffle_epi32 (hadd128, 0 | (2 << 2) | (1 << 4) | (3 << 6)); + return64(hadd128); +} + + +_NEON2SSESTORAGE uint8x8_t vpadd_u8(uint8x8_t a, uint8x8_t b); // VPADD.I8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vpadd_u8(uint8x8_t a, uint8x8_t b) // VPADD.I8 d0,d0,d0 +{ + // no 8 bit hadd in IA32, need to go to 16 bit and then pack + uint8x8_t res64; +// no unsigned _mm_hadd_ functions in IA32, but 8 unsigned is less then 16 signed, so it should work + __m128i mask8, a16, b16, res; + mask8 = _mm_set1_epi16(0xff); + a16 = _MM_CVTEPU8_EPI16 (_pM128i(a)); // SSE 4.1 + b16 = _MM_CVTEPU8_EPI16 (_pM128i(b)); // SSE 4.1 + res = _mm_hadd_epi16 (a16, b16); + res = _mm_and_si128(res, mask8); //to avoid saturation + res = _mm_packus_epi16 (res,res); //use low 64 bits + return64(res); +} + +_NEON2SSESTORAGE uint16x4_t vpadd_u16(uint16x4_t a, uint16x4_t b); // VPADD.I16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vpadd_u16(uint16x4_t a, uint16x4_t b) // VPADD.I16 d0,d0,d0 +{ + // solution may be not optimal, serial execution may be faster + // no unsigned _mm_hadd_ functions in IA32, need to move from unsigned to signed + uint16x4_t res64; + __m128i c32767, cfffe, as, bs, res; + c32767 = _mm_set1_epi16 (32767); + cfffe = _mm_set1_epi16 (-2); //(int16_t)0xfffe + as = _mm_sub_epi16 (_pM128i(a), c32767); + bs = _mm_sub_epi16 (_pM128i(b), c32767); + res = _mm_hadd_epi16 (as, bs); + res = _mm_add_epi16 (res, cfffe); + res = _mm_shuffle_epi32 (res, 0 | (2 << 2) | (1 << 4) | (3 << 6)); + return64(res); +} + +_NEON2SSESTORAGE uint32x2_t vpadd_u32(uint32x2_t a, uint32x2_t b); // VPADD.I32 d0,d0,d0 +_NEON2SSE_INLINE uint32x2_t vpadd_u32(uint32x2_t a, uint32x2_t b) //serial may be faster +{ + //hadd doesn't work for unsigned values + uint32x2_t res64; + __m128i ab, ab_sh, res; + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //a0 a1 b0 b1 + ab_sh = _mm_shuffle_epi32(ab, 1 | (0 << 2) | (3 << 4) | (2 << 6)); //a1, a0, b1, b0 + res = _mm_add_epi32(ab, ab_sh); + res = _mm_shuffle_epi32(res, 0 | (2 << 2) | (1 << 4) | (3 << 6)); + return64(res); +} + +_NEON2SSESTORAGE float32x2_t vpadd_f32(float32x2_t a, float32x2_t b); // VPADD.F32 d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vpadd_f32(float32x2_t a, float32x2_t b) +{ + __m128 hadd128; + __m64_128 res64; + hadd128 = _mm_hadd_ps (_pM128(a), _pM128(b)); + hadd128 = _mm_shuffle_ps (hadd128, hadd128, _MM_SHUFFLE(3,1, 2, 0)); //use low 64 bits + _M64f(res64, hadd128); + return res64; +} + + +//************************** Long pairwise add ********************************** +//********************************************************************************* +//Adds adjacent pairs of elements of a vector,sign or zero extends the results to twice their original width, +// and places the final results in the destination vector. + +_NEON2SSESTORAGE int16x4_t vpaddl_s8(int8x8_t a); // VPADDL.S8 d0,d0 +_NEON2SSE_INLINE int16x4_t vpaddl_s8(int8x8_t a) // VPADDL.S8 d0,d0 +{ + //no 8 bit hadd in IA32, need to go to 16 bit anyway + __m128i a16; + int16x4_t res64; + a16 = _MM_CVTEPI8_EPI16 (_pM128i(a)); // SSE 4.1 + a16 = _mm_hadd_epi16 (a16, a16); //use low 64 bits + return64(a16); +} + +_NEON2SSESTORAGE int32x2_t vpaddl_s16(int16x4_t a); // VPADDL.S16 d0,d0 +_NEON2SSE_INLINE int32x2_t vpaddl_s16(int16x4_t a) // VPADDL.S16 d0,d0 +{ + // solution may be not optimal, serial execution may be faster + int32x2_t res64; + __m128i r32_1; + r32_1 = _MM_CVTEPI16_EPI32 (_pM128i(a)); + r32_1 = _mm_hadd_epi32(r32_1, r32_1); //use low 64 bits + return64(r32_1); +} + +_NEON2SSESTORAGE int64x1_t vpaddl_s32(int32x2_t a); // VPADDL.S32 d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vpaddl_s32(int32x2_t a), _NEON2SSE_REASON_SLOW_SERIAL) //serial solution looks faster +{ + int64x1_t res; + res.m64_i64[0] = (int64_t)a.m64_i32[0] + (int64_t)a.m64_i32[1]; + return res; +} + +_NEON2SSESTORAGE uint16x4_t vpaddl_u8(uint8x8_t a); // VPADDL.U8 d0,d0 +_NEON2SSE_INLINE uint16x4_t vpaddl_u8(uint8x8_t a) // VPADDL.U8 d0,d0 +{ + // no 8 bit hadd in IA32, need to go to 16 bit +// no unsigned _mm_hadd_ functions in IA32, but 8 unsigned is less then 16 signed, so it should work + uint16x4_t res64; + __m128i a16; + a16 = _MM_CVTEPU8_EPI16 (_pM128i(a)); // SSE 4.1 use low 64 bits + a16 = _mm_hadd_epi16 (a16, a16); //use low 64 bits + return64(a16); +} + +_NEON2SSESTORAGE uint32x2_t vpaddl_u16(uint16x4_t a); // VPADDL.s16 d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vpaddl_u16(uint16x4_t a), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution looks faster than a SIMD one + uint32x2_t res; + res.m64_u32[0] = (uint32_t)a.m64_u16[0] + (uint32_t)a.m64_u16[1]; + res.m64_u32[1] = (uint32_t)a.m64_u16[2] + (uint32_t)a.m64_u16[3]; + return res; +} + +_NEON2SSESTORAGE uint64x1_t vpaddl_u32(uint32x2_t a); // VPADDL.U32 d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vpaddl_u32(uint32x2_t a), _NEON2SSE_REASON_SLOW_SERIAL) //serial solution looks faster +{ + uint64x1_t res; + res.m64_u64[0] = (uint64_t)a.m64_u32[0] + (uint64_t)a.m64_u32[1]; + return res; +} + +_NEON2SSESTORAGE int16x8_t vpaddlq_s8(int8x16_t a); // VPADDL.S8 q0,q0 +_NEON2SSE_INLINE int16x8_t vpaddlq_s8(int8x16_t a) // VPADDL.S8 q0,q0 +{ + //no 8 bit hadd in IA32, need to go to 16 bit + __m128i r16_1, r16_2; + r16_1 = _MM_CVTEPI8_EPI16 (a); // SSE 4.1 + //swap hi and low part of r to process the remaining data + r16_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32); + r16_2 = _MM_CVTEPI8_EPI16 (r16_2); + return _mm_hadd_epi16 (r16_1, r16_2); +} + +_NEON2SSESTORAGE int32x4_t vpaddlq_s16(int16x8_t a); // VPADDL.S16 q0,q0 +_NEON2SSE_INLINE int32x4_t vpaddlq_s16(int16x8_t a) // VPADDL.S16 q0,q0 +{ + //no 8 bit hadd in IA32, need to go to 16 bit + __m128i r32_1, r32_2; + r32_1 = _MM_CVTEPI16_EPI32(a); + //swap hi and low part of r to process the remaining data + r32_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32); + r32_2 = _MM_CVTEPI16_EPI32 (r32_2); + return _mm_hadd_epi32 (r32_1, r32_2); +} + +_NEON2SSESTORAGE int64x2_t vpaddlq_s32(int32x4_t a); // VPADDL.S32 q0,q0 +_NEON2SSE_INLINE int64x2_t vpaddlq_s32(int32x4_t a) +{ + __m128i top, bot; + bot = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); + bot = _MM_CVTEPI32_EPI64(bot); + top = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 3, 1)); + top = _MM_CVTEPI32_EPI64(top); + return _mm_add_epi64(top, bot); +} + +_NEON2SSESTORAGE uint16x8_t vpaddlq_u8(uint8x16_t a); // VPADDL.U8 q0,q0 +_NEON2SSE_INLINE uint16x8_t vpaddlq_u8(uint8x16_t a) // VPADDL.U8 q0,q0 +{ + const __m128i ff = _mm_set1_epi16(0xFF); + __m128i low = _mm_and_si128(a, ff); + __m128i high = _mm_srli_epi16(a, 8); + return _mm_add_epi16(low, high); +} + +#ifdef USE_SSE4 +_NEON2SSESTORAGE uint32x4_t vpaddlq_u16(uint16x8_t a); // VPADDL.s16 q0,q0 +_NEON2SSE_INLINE uint32x4_t vpaddlq_u16(uint16x8_t a) +{ + const __m128i zero = _mm_setzero_si128(); + __m128i low = _mm_blend_epi16(zero, a, 0x55); // 0b1010101 + __m128i high = _mm_srli_epi32(a, 16); + return _mm_add_epi32(low, high); +} + +_NEON2SSESTORAGE uint64x2_t vpaddlq_u32(uint32x4_t a); // VPADDL.U32 q0,q0 +_NEON2SSE_INLINE uint64x2_t vpaddlq_u32(uint32x4_t a) +{ + const __m128i zero = _mm_setzero_si128(); + __m128i low = _mm_blend_epi16(zero, a, 0x33); // 0b00110011 + __m128i high = _mm_srli_epi64(a, 32); + return _mm_add_epi64(low, high); +} +#else +_NEON2SSESTORAGE uint32x4_t vpaddlq_u16(uint16x8_t a); // VPADDL.s16 q0,q0 +_NEON2SSE_INLINE uint32x4_t vpaddlq_u16(uint16x8_t a) +{ + const __m128i ff = _mm_set1_epi32(0xFFFF); + __m128i low = _mm_and_si128(a, ff); + __m128i high = _mm_srli_epi32(a, 16); + return _mm_add_epi32(low, high); +} + +_NEON2SSESTORAGE uint64x2_t vpaddlq_u32(uint32x4_t a); // VPADDL.U32 q0,q0 +_NEON2SSE_INLINE uint64x2_t vpaddlq_u32(uint32x4_t a) +{ + const __m128i ff = _mm_set_epi32(0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF); + __m128i low = _mm_and_si128(a, ff); + __m128i high = _mm_srli_epi64(a, 32); + return _mm_add_epi64(low, high); +} +#endif + +//************************ Long pairwise add and accumulate ************************** +//**************************************************************************************** +//VPADAL (Vector Pairwise Add and Accumulate Long) adds adjacent pairs of elements of a vector, +// and accumulates the values of the results into the elements of the destination (wide) vector +_NEON2SSESTORAGE int16x4_t vpadal_s8(int16x4_t a, int8x8_t b); // VPADAL.S8 d0,d0 +_NEON2SSE_INLINE int16x4_t vpadal_s8(int16x4_t a, int8x8_t b) +{ + int16x4_t res64; + return64(vpadalq_s8(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE int32x2_t vpadal_s16(int32x2_t a, int16x4_t b); // VPADAL.S16 d0,d0 +_NEON2SSE_INLINE int32x2_t vpadal_s16(int32x2_t a, int16x4_t b) +{ + int32x2_t res64; + return64(vpadalq_s16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vpadal_s32(int64x1_t a, int32x2_t b); // VPADAL.S32 d0,d0 +_NEON2SSE_INLINE int64x1_t vpadal_s32(int64x1_t a, int32x2_t b) +{ + int64x1_t res; + res.m64_i64[0] = (int64_t)b.m64_i32[0] + (int64_t)b.m64_i32[1] + a.m64_i64[0]; + return res; +} + +_NEON2SSESTORAGE uint16x4_t vpadal_u8(uint16x4_t a, uint8x8_t b); // VPADAL.U8 d0,d0 +_NEON2SSE_INLINE uint16x4_t vpadal_u8(uint16x4_t a, uint8x8_t b) +{ + uint16x4_t res64; + return64(vpadalq_u8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE uint32x2_t vpadal_u16(uint32x2_t a, uint16x4_t b); // VPADAL.s16 d0,d0 +_NEON2SSE_INLINE uint32x2_t vpadal_u16(uint32x2_t a, uint16x4_t b) +{ + uint32x2_t res64; + return64(vpadalq_u16(_pM128i(a), _pM128i(b))); +} + +_NEON2SSESTORAGE uint64x1_t vpadal_u32(uint64x1_t a, uint32x2_t b); // VPADAL.U32 d0,d0 +_NEON2SSE_INLINE uint64x1_t vpadal_u32(uint64x1_t a, uint32x2_t b) +{ + uint64x1_t res; + res.m64_u64[0] = (uint64_t)b.m64_u32[0] + (uint64_t)b.m64_u32[1] + a.m64_u64[0]; + return res; +} + +_NEON2SSESTORAGE int16x8_t vpadalq_s8(int16x8_t a, int8x16_t b); // VPADAL.S8 q0,q0 +_NEON2SSE_INLINE int16x8_t vpadalq_s8(int16x8_t a, int8x16_t b) // VPADAL.S8 q0,q0 +{ + int16x8_t pad; + pad = vpaddlq_s8(b); + return _mm_add_epi16 (a, pad); +} + +_NEON2SSESTORAGE int32x4_t vpadalq_s16(int32x4_t a, int16x8_t b); // VPADAL.S16 q0,q0 +_NEON2SSE_INLINE int32x4_t vpadalq_s16(int32x4_t a, int16x8_t b) // VPADAL.S16 q0,q0 +{ + int32x4_t pad; + pad = vpaddlq_s16(b); + return _mm_add_epi32(a, pad); +} + +_NEON2SSESTORAGE int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b); // VPADAL.S32 q0,q0 +_NEON2SSE_INLINE int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b) +{ + int64x2_t pad; + pad = vpaddlq_s32(b); + return _mm_add_epi64 (a, pad); +} + +_NEON2SSESTORAGE uint16x8_t vpadalq_u8(uint16x8_t a, uint8x16_t b); // VPADAL.U8 q0,q0 +_NEON2SSE_INLINE uint16x8_t vpadalq_u8(uint16x8_t a, uint8x16_t b) // VPADAL.U8 q0,q0 +{ + uint16x8_t pad; + pad = vpaddlq_u8(b); + return _mm_add_epi16 (a, pad); +} + +_NEON2SSESTORAGE uint32x4_t vpadalq_u16(uint32x4_t a, uint16x8_t b); // VPADAL.s16 q0,q0 +_NEON2SSE_INLINE uint32x4_t vpadalq_u16(uint32x4_t a, uint16x8_t b) +{ + uint32x4_t pad; + pad = vpaddlq_u16(b); + return _mm_add_epi32(a, pad); +} //no optimal SIMD solution, serial is faster + +_NEON2SSESTORAGE uint64x2_t vpadalq_u32(uint64x2_t a, uint32x4_t b); // VPADAL.U32 q0,q0 +_NEON2SSE_INLINE uint64x2_t vpadalq_u32(uint64x2_t a, uint32x4_t b) +{ + uint64x2_t pad; + pad = vpaddlq_u32(b); + return _mm_add_epi64(a, pad); +} + +//********** Folding maximum ************************************* +//******************************************************************* +//VPMAX (Vector Pairwise Maximum) compares adjacent pairs of elements in two vectors, +//and copies the larger of each pair into the corresponding element in the destination +// no corresponding functionality in IA32 SIMD, so we need to do the vertical comparison +_NEON2SSESTORAGE int8x8_t vpmax_s8(int8x8_t a, int8x8_t b); // VPMAX.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vpmax_s8(int8x8_t a, int8x8_t b) // VPMAX.S8 d0,d0,d0 +{ + int8x8_t res64; + __m128i ab, ab1, max; + _NEON2SSE_ALIGN_16 static const uint8_t mask8_sab[16] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; + _NEON2SSE_ALIGN_16 static const uint8_t mask8_odd[16] = { 1, 3, 5, 7, 9, 11, 13, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask8_sab); //horizontal pairs swap for vertical max finding + max = _MM_MAX_EPI8 (ab, ab1); // SSE4.1 + max = _mm_shuffle_epi8 (max, *(__m128i*) mask8_odd); //remove repetitive data + return64(max); //we need 64 bits only +} + +_NEON2SSESTORAGE int16x4_t vpmax_s16(int16x4_t a, int16x4_t b); // VPMAX.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vpmax_s16(int16x4_t a, int16x4_t b) // VPMAX.S16 d0,d0,d0 +{ + //solution may be not optimal compared with the serial one + int16x4_t res64; + __m128i ab, ab1, max; + _NEON2SSE_ALIGN_16 static const int8_t mask16_sab[16] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}; //each chars pair is considered to be 16 bit number + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask16_sab); //horizontal pairs swap for vertical max finding, use 8bit fn and the corresponding mask + max = _mm_max_epi16 (ab, ab1); + max = _mm_shuffle_epi8 (max, *(__m128i*) mask8_32_even_odd); //remove repetitive data, only the low part of mask is used + return64(max); +} + +_NEON2SSESTORAGE int32x2_t vpmax_s32(int32x2_t a, int32x2_t b); // VPMAX.S32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vpmax_s32(int32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution looks faster than SIMD one + int32x2_t res; + res.m64_i32[0] = (a.m64_i32[0] < a.m64_i32[1]) ? a.m64_i32[1] : a.m64_i32[0]; + res.m64_i32[1] = (b.m64_i32[0] < b.m64_i32[1]) ? b.m64_i32[1] : b.m64_i32[0]; + return res; +} + +_NEON2SSESTORAGE uint8x8_t vpmax_u8(uint8x8_t a, uint8x8_t b); // VPMAX.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vpmax_u8(uint8x8_t a, uint8x8_t b) // VPMAX.U8 d0,d0,d0 +{ + uint8x8_t res64; + __m128i ab, ab1, max; + _NEON2SSE_ALIGN_16 static const int8_t mask8_sab[16] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; + _NEON2SSE_ALIGN_16 static const uint8_t mask8_odd[16] = { 1, 3, 5, 7, 9, 11, 13, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + ab = _mm_unpacklo_epi64 (_pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask8_sab); //horizontal pairs swap for vertical max finding + max = _mm_max_epu8 (ab, ab1); // SSE4.1 + max = _mm_shuffle_epi8 (max, *(__m128i*) mask8_odd); //remove repetitive data + return64(max); +} + +_NEON2SSESTORAGE uint16x4_t vpmax_u16(uint16x4_t a, uint16x4_t b); // VPMAX.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vpmax_u16(uint16x4_t a, uint16x4_t b) // VPMAX.s16 d0,d0,d0 +{ + //solution may be not optimal compared with the serial one + uint16x4_t res64; + __m128i ab, ab1, max; + _NEON2SSE_ALIGN_16 static const uint8_t mask16_sab[16] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}; //each chars pair is considered to be 16 bit number + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask16_sab); //horizontal pairs swap for vertical max finding, use 8bit fn and the corresponding mask + max = _MM_MAX_EPU16 (ab, ab1); + max = _mm_shuffle_epi8 (max, *(__m128i*) mask8_32_even_odd); //remove repetitive data, only the low part of mask is used + return64(max); +} + +_NEON2SSESTORAGE uint32x2_t vpmax_u32(uint32x2_t a, uint32x2_t b); // VPMAX.U32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vpmax_u32(uint32x2_t a, uint32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution looks faster than SIMD one + uint32x2_t res; + res.m64_u32[0] = (a.m64_u32[0] < a.m64_u32[1]) ? a.m64_u32[1] : a.m64_u32[0]; + res.m64_u32[1] = (b.m64_u32[0] < b.m64_u32[1]) ? b.m64_u32[1] : b.m64_u32[0]; + return res; +} + +_NEON2SSESTORAGE float32x2_t vpmax_f32(float32x2_t a, float32x2_t b); // VPMAX.F32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(float32x2_t vpmax_f32(float32x2_t a, float32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution looks faster than SIMD one + float32x2_t res; + res.m64_f32[0] = (a.m64_f32[0] < a.m64_f32[1]) ? a.m64_f32[1] : a.m64_f32[0]; + res.m64_f32[1] = (b.m64_f32[0] < b.m64_f32[1]) ? b.m64_f32[1] : b.m64_f32[0]; + return res; +} + +// ***************** Folding minimum **************************** +// ************************************************************** +//vpmin -> takes minimum of adjacent pairs +_NEON2SSESTORAGE int8x8_t vpmin_s8(int8x8_t a, int8x8_t b); // VPMIN.S8 d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vpmin_s8(int8x8_t a, int8x8_t b) // VPMIN.S8 d0,d0,d0 +{ + int8x8_t res64; + __m128i ab, ab1, min; + _NEON2SSE_ALIGN_16 static const uint8_t mask8_sab[16] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; + _NEON2SSE_ALIGN_16 static const uint8_t mask8_odd[16] = { 1, 3, 5, 7, 9, 11, 13, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask8_sab); //horizontal pairs swap for vertical min finding + min = _MM_MIN_EPI8 (ab, ab1); // SSE4.1 + min = _mm_shuffle_epi8 (min, *(__m128i*) mask8_odd); //remove repetitive data + return64(min); +} + +_NEON2SSESTORAGE int16x4_t vpmin_s16(int16x4_t a, int16x4_t b); // VPMIN.S16 d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vpmin_s16(int16x4_t a, int16x4_t b) // VPMIN.S16 d0,d0,d0 +{ + //solution may be not optimal compared with the serial one + int16x4_t res64; + __m128i ab, ab1, min; + _NEON2SSE_ALIGN_16 static const int8_t mask16_sab[16] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}; //each chars pair is considered to be 16 bit number + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask16_sab); //horizontal pairs swap for vertical max finding, use 8bit fn and the corresponding mask + min = _mm_min_epi16 (ab, ab1); + min = _mm_shuffle_epi8 (min, *(__m128i*) mask8_32_even_odd); //remove repetitive data, only the low part of mask is used + return64(min); +} + +_NEON2SSESTORAGE int32x2_t vpmin_s32(int32x2_t a, int32x2_t b); // VPMIN.S32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vpmin_s32(int32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution looks faster than SIMD one + int32x2_t res; + res.m64_i32[0] = (a.m64_i32[0] > a.m64_i32[1]) ? a.m64_i32[1] : a.m64_i32[0]; + res.m64_i32[1] = (b.m64_i32[0] > b.m64_i32[1]) ? b.m64_i32[1] : b.m64_i32[0]; + return res; +} + +_NEON2SSESTORAGE uint8x8_t vpmin_u8(uint8x8_t a, uint8x8_t b); // VPMIN.U8 d0,d0,d0 +_NEON2SSE_INLINE uint8x8_t vpmin_u8(uint8x8_t a, uint8x8_t b) // VPMIN.U8 d0,d0,d0 +{ + uint8x8_t res64; + __m128i ab, ab1, min; + _NEON2SSE_ALIGN_16 static const uint8_t mask8_sab[16] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; + _NEON2SSE_ALIGN_16 static const uint8_t mask8_odd[16] = { 1, 3, 5, 7, 9, 11, 13, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask8_sab); //horizontal pairs swap for vertical max finding + min = _mm_min_epu8 (ab, ab1); // SSE4.1 + min = _mm_shuffle_epi8 (min, *(__m128i*) mask8_odd); //remove repetitive data + return64(min); +} + +_NEON2SSESTORAGE uint16x4_t vpmin_u16(uint16x4_t a, uint16x4_t b); // VPMIN.s16 d0,d0,d0 +_NEON2SSE_INLINE uint16x4_t vpmin_u16(uint16x4_t a, uint16x4_t b) // VPMIN.s16 d0,d0,d0 +{ + //solution may be not optimal compared with the serial one + uint16x4_t res64; + __m128i ab, ab1, min; + _NEON2SSE_ALIGN_16 static const uint8_t mask16_sab[16] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}; //each chars pair is considered to be 16 bit number + ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //ab + ab1 = _mm_shuffle_epi8 (ab, *(__m128i*) mask16_sab); //horizontal pairs swap for vertical min finding, use 8bit fn and the corresponding mask + min = _MM_MIN_EPU16 (ab, ab1); + min = _mm_shuffle_epi8 (min, *(__m128i*) mask8_32_even_odd); //remove repetitive data, only the low part of mask is used + return64(min); +} + +_NEON2SSESTORAGE uint32x2_t vpmin_u32(uint32x2_t a, uint32x2_t b); // VPMIN.U32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vpmin_u32(uint32x2_t a, uint32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution looks faster than SIMD one + uint32x2_t res; + res.m64_u32[0] = (a.m64_u32[0] > a.m64_u32[1]) ? a.m64_u32[1] : a.m64_u32[0]; + res.m64_u32[1] = (b.m64_u32[0] > b.m64_u32[1]) ? b.m64_u32[1] : b.m64_u32[0]; + return res; +} + +_NEON2SSESTORAGE float32x2_t vpmin_f32(float32x2_t a, float32x2_t b); // VPMIN.F32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(float32x2_t vpmin_f32(float32x2_t a, float32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution looks faster than SIMD one + float32x2_t res; + res.m64_f32[0] = (a.m64_f32[0] > a.m64_f32[1]) ? a.m64_f32[1] : a.m64_f32[0]; + res.m64_f32[1] = (b.m64_f32[0] > b.m64_f32[1]) ? b.m64_f32[1] : b.m64_f32[0]; + return res; +} + +//*************************************************************** +//*********** Reciprocal/Sqrt ************************************ +//*************************************************************** +//****************** Reciprocal estimate ******************************* +//the ARM NEON and x86 SIMD results may be slightly different +_NEON2SSESTORAGE float32x2_t vrecpe_f32(float32x2_t a); // VRECPE.F32 d0,d0 +_NEON2SSE_INLINE float32x2_t vrecpe_f32(float32x2_t a) //use low 64 bits +{ + float32x4_t res; + __m64_128 res64; + res = _mm_rcp_ps(_pM128(a)); + _M64f(res64, res); + return res64; +} + +_NEON2SSESTORAGE uint32x2_t vrecpe_u32(uint32x2_t a); // VRECPE.U32 d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vrecpe_u32(uint32x2_t a), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //Input is fixed point number!!! No reciprocal for ints in IA32 available + uint32x2_t res; + float resf, r; + int i, q, s; + for (i =0; i<2; i++){ + if((a.m64_u32[i] & 0x80000000) == 0) { + res.m64_u32[i] = 0xffffffff; + }else{ + resf = (float) (a.m64_u32[i] * (0.5f / (uint32_t)(1 << 31))); + q = (int)(resf * 512.0f); /* a in units of 1/512 rounded down */ + r = (float)(1.0f / (((float)q + 0.5f) / 512.0f)); /* reciprocal r */ + s = (int)(256.0f * r + 0.5f); /* r in units of 1/256 rounded to nearest */ + r = (float)s / 256.0f; + res.m64_u32[i] = (uint32_t)(r * (uint32_t)(1 << 31)); + } + } + return res; +} + +_NEON2SSE_GLOBAL float32x4_t vrecpeq_f32(float32x4_t a); // VRECPE.F32 q0,q0 +#define vrecpeq_f32 _mm_rcp_ps + + +_NEON2SSESTORAGE uint32x4_t vrecpeq_u32(uint32x4_t a); // VRECPE.U32 q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vrecpeq_u32(uint32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //Input is fixed point number!!! + //We implement the recip_estimate function as described in ARMv7 reference manual (VRECPE instruction) but use float instead of double + _NEON2SSE_ALIGN_16 uint32_t atmp[4]; + _NEON2SSE_ALIGN_16 uint32_t res[4]; + _NEON2SSE_ALIGN_16 static const uint32_t c80000000[4] = {0x80000000,0x80000000, 0x80000000,0x80000000}; + float resf, r; + int i, q, s; + __m128i res128, mask, zero; + _mm_store_si128((__m128i*)atmp, a); + zero = _mm_setzero_si128(); + for (i =0; i<4; i++){ + resf = (atmp[i] * (0.5f / (uint32_t) (1 << 31))); // 2.3283064365386963E-10 ~(0.5f / (uint32_t) (1 << 31)) + q = (int)(resf * 512.0f); /* a in units of 1/512 rounded down */ + r = 1.0f / (((float)q + 0.5f) / 512.0f); /* reciprocal r */ + s = (int)(256.0f * r + 0.5f); /* r in units of 1/256 rounded to nearest */ + r = (float)s / 256.0f; + res[i] = (uint32_t) (r * (((uint32_t)1) << 31) ); + } + res128 = _mm_load_si128((__m128i*)res); + mask = _mm_and_si128(a, *(__m128i*)c80000000); + mask = _mm_cmpeq_epi32(zero, mask); //0xffffffff if atmp[i] <= 0x7fffffff + return _mm_or_si128(res128, mask); +} + +//**********Reciprocal square root estimate **************** +//********************************************************** +//no reciprocal square root for ints in IA32 available, neither for unsigned int to float4 lanes conversion, so a serial solution looks faster +//but the particular implementation for vrsqrte_u32 may vary for various ARM compilers +////the ARM NEON and x86 SIMD results may be slightly different +_NEON2SSESTORAGE float32x2_t vrsqrte_f32(float32x2_t a); // VRSQRTE.F32 d0,d0 +_NEON2SSE_INLINE float32x2_t vrsqrte_f32(float32x2_t a) //use low 64 bits +{ + float32x4_t res; + __m64_128 res64; + res = _mm_rsqrt_ps(_pM128(a)); + _M64f(res64, res); + return res64; +} + +_NEON2SSESTORAGE uint32x2_t vrsqrte_u32(uint32x2_t a); // VRSQRTE.U32 d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vrsqrte_u32(uint32x2_t a), _NEON2SSE_REASON_SLOW_SERIAL) +{ + // Input is fixed point number!!! + // We implement the recip_sqrt_estimate function as described in ARMv7 + // reference manual (VRSQRTE instruction) But results may be slightly different + // from ARM implementation due to _mm_rsqrt_ps precision + uint32x2_t res; + __m64_128 res64[2]; + int i; + _NEON2SSE_ALIGN_16 float coeff[2]; + for (i = 0; i < 2; i++) { + // Generate double-precision value = operand * 2^(-32). This has zero sign + // bit, with: + // exponent = 1022 or 1021 = double-precision representation of 2^(-1) + // or 2^(-2) fraction taken from operand, excluding its most significant + // one or two bits. + uint64_t dp_operand; + if (a.m64_u32[i] & 0x80000000) { + dp_operand = + (0x3feLL << 52) | (((uint64_t)a.m64_u32[i] & 0x7FFFFFFF) << 21); + } else { + dp_operand = + (0x3fdLL << 52) | (((uint64_t)a.m64_u32[i] & 0x3FFFFFFF) << 22); + } + res64[i].m64_u64[0] = dp_operand; + coeff[i] = (res64[i].m64_d64[0] < 0.5) ? 512.0f : 256.0f; /* range 0.25 <= resf < 0.5 or range 0.5 <= resf < 1.0*/ + } + __m128 coeff_f = _mm_load_ps(coeff); + __m128d q0_d = _mm_mul_pd(_mm_loadu_pd(&res64[0].m64_d64[0]), _mm_cvtps_pd(coeff_f)); + __m128i q0_i = _mm_cvttpd_epi32(q0_d); + __m128 c05_f = _mm_set1_ps(0.5); + __m128 r_f = _mm_div_ps(_mm_add_ps(_mm_cvtepi32_ps(q0_i), c05_f), coeff_f); + __m128 rsqrt_f = _mm_rsqrt_ps(r_f); + __m128 c256_f = _mm_set1_ps(256.0); + __m128 s_f = _mm_add_ps(_mm_mul_ps(rsqrt_f, c256_f), c05_f); +#ifdef USE_SSE4 + s_f = _mm_floor_ps(s_f); +#else + s_f = _mm_cvtepi32_ps(_mm_cvttps_epi32(s_f)); +#endif + s_f = _mm_div_ps(s_f, c256_f); + _M64f(res64[0], s_f); + + for (i = 0; i < 2; i++) { + if ((a.m64_u32[i] & 0xc0000000) == 0) { // a <=0x3fffffff + res.m64_u32[i] = 0xffffffff; + } else { + res.m64_u32[i] = (uint32_t)(res64[0].m64_f32[i] * (((uint32_t)1) << 31)); + } + } + return res; +} + +_NEON2SSE_GLOBAL float32x4_t vrsqrteq_f32(float32x4_t a); // VRSQRTE.F32 q0,q0 +#define vrsqrteq_f32 _mm_rsqrt_ps + +_NEON2SSESTORAGE uint32x4_t vrsqrteq_u32(uint32x4_t a); // VRSQRTE.U32 q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vrsqrteq_u32(uint32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL) +{ + // Input is fixed point number!!! + // We implement the recip_sqrt_estimate function as described in ARMv7 + // reference manual (VRSQRTE instruction) But results may be slightly different + // from ARM implementation due to _mm_rsqrt_ps precision + int i; + _NEON2SSE_ALIGN_16 uint32_t atmp[4], res[4]; + _NEON2SSE_ALIGN_16 float coeff[4], rr[4]; + char* coeff_f2_c = (char*)&coeff[2]; + __m64_128 res64[4]; + _mm_store_si128((__m128i *)atmp, a); + for (i = 0; i < 4; i++) { + // Generate double-precision value = operand * 2^(-32). This has zero sign + // bit, with: + // exponent = 1022 or 1021 = double-precision representation of 2^(-1) + // or 2^(-2) fraction taken from operand, excluding its most significant + // one or two bits. + uint64_t dp_operand; + if (atmp[i] & 0x80000000) { + dp_operand = (0x3feLL << 52) | (((uint64_t)atmp[i] & 0x7FFFFFFF) << 21); + } else { + dp_operand = (0x3fdLL << 52) | (((uint64_t)atmp[i] & 0x3FFFFFFF) << 22); + } + res64[i].m64_u64[0] = dp_operand; + coeff[i] = (res64[i].m64_d64[0] < 0.5) ? 512.0f : 256.0f; /* range 0.25 <= resf < 0.5 or range 0.5 <= resf < 1.0*/ + } + __m128 c05_f = _mm_set1_ps(0.5); + __m128 coeff_f = _mm_load_ps(coeff); + __m128d q0_d = _mm_mul_pd(_mm_loadu_pd(&res64[0].m64_d64[0]), _mm_cvtps_pd(coeff_f)); + __m128i q0_i = _mm_cvttpd_epi32(q0_d); + + __m128 coeff_f2 = _M128(_pM128i(*coeff_f2_c)); + q0_d = _mm_mul_pd(_mm_loadu_pd(&res64[2].m64_d64[0]), _mm_cvtps_pd(coeff_f2)); + __m128i q0_i2 = _mm_cvttpd_epi32(q0_d); + coeff_f = _M128(_mm_unpacklo_epi64(_M128i(coeff_f), _M128i(coeff_f2))); + q0_i = _mm_unpacklo_epi64(q0_i, q0_i2); + + __m128 r_f = _mm_div_ps(_mm_add_ps(_mm_cvtepi32_ps(q0_i), c05_f), coeff_f); + __m128 rsqrt_f = _mm_rsqrt_ps(r_f); + __m128 c256_f = _mm_set1_ps(256.0); + __m128 s_f = _mm_add_ps(_mm_mul_ps(rsqrt_f, c256_f), c05_f); +#ifdef USE_SSE4 + s_f = _mm_floor_ps(s_f); +#else + s_f = _mm_cvtepi32_ps(_mm_cvttps_epi32(s_f)); +#endif + s_f = _mm_div_ps(s_f, c256_f); + _mm_store_ps(rr, s_f); + + for (i = 0; i < 4; i++) { + if ((atmp[i] & 0xc0000000) == 0) { // a <=0x3fffffff + res[i] = 0xffffffff; + } else { + res[i] = (uint32_t)(rr[i] * (((uint32_t)1) << 31)); + } + } + return _mm_load_si128((__m128i *)res); +} + +//************ Reciprocal estimate/step and 1/sqrt estimate/step *************************** +//****************************************************************************************** +//******VRECPS (Vector Reciprocal Step) *************************************************** +//multiplies the elements of one vector by the corresponding elements of another vector, +//subtracts each of the results from 2, and places the final results into the elements of the destination vector. + +_NEON2SSESTORAGE float32x2_t vrecps_f32(float32x2_t a, float32x2_t b); // VRECPS.F32 d0, d0, d0 +_NEON2SSE_INLINE float32x2_t vrecps_f32(float32x2_t a, float32x2_t b) +{ + float32x4_t res; + __m64_128 res64; + res = vrecpsq_f32(_pM128(a), _pM128(b)); + _M64f(res64, res); + return res64; +} + +_NEON2SSESTORAGE float32x4_t vrecpsq_f32(float32x4_t a, float32x4_t b); // VRECPS.F32 q0, q0, q0 +_NEON2SSE_INLINE float32x4_t vrecpsq_f32(float32x4_t a, float32x4_t b) // VRECPS.F32 q0, q0, q0 +{ + __m128 f2, mul; + f2 = _mm_set1_ps(2.); + mul = _mm_mul_ps(a,b); + return _mm_sub_ps(f2,mul); +} + +//*****************VRSQRTS (Vector Reciprocal Square Root Step) ***************************** +//multiplies the elements of one vector by the corresponding elements of another vector, +//subtracts each of the results from 3, divides these results by two, and places the final results into the elements of the destination vector. + +_NEON2SSESTORAGE float32x2_t vrsqrts_f32(float32x2_t a, float32x2_t b); // VRSQRTS.F32 d0, d0, d0 +_NEON2SSE_INLINE float32x2_t vrsqrts_f32(float32x2_t a, float32x2_t b) +{ + float32x2_t res; + res.m64_f32[0] = (3 - a.m64_f32[0] * b.m64_f32[0]) / 2; + res.m64_f32[1] = (3 - a.m64_f32[1] * b.m64_f32[1]) / 2; + return res; +} + +_NEON2SSESTORAGE float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b); // VRSQRTS.F32 q0, q0, q0 +_NEON2SSE_INLINE float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b) // VRSQRTS.F32 q0, q0, q0 +{ + __m128 f3, f05, mul; + f3 = _mm_set1_ps(3.f); + f05 = _mm_set1_ps(0.5f); + mul = _mm_mul_ps(a,b); + f3 = _mm_sub_ps(f3,mul); + return _mm_mul_ps (f3, f05); +} +//******************************************************************************************** +//***************************** Shifts by signed variable *********************************** +//******************************************************************************************** +//***** Vector shift left: Vr[i] := Va[i] << Vb[i] (negative values shift right) *********************** +//******************************************************************************************** +//No such operations in IA32 SIMD unfortunately, constant shift only available, so need to do the serial solution +//helper macro. It matches ARM implementation for big shifts +#define SERIAL_SHIFT(TYPE, INTERNAL_TYPE, LENMAX, LEN) \ + _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 INTERNAL_TYPE btmp[LENMAX]; int i, lanesize = sizeof(INTERNAL_TYPE) << 3; \ + _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \ + for (i = 0; i= lanesize)||(btmp[i] <= -lanesize) ) res[i] = 0; \ + else res[i] = (btmp[i] >=0) ? atmp[i] << btmp[i] : atmp[i] >> (-btmp[i]); } \ + return _mm_load_si128((__m128i*)res); + +#define SERIAL_SHIFT_64(TYPE, SIGN, LEN) \ + int ## TYPE ## x ## LEN ## _t res; int i, lanesize = sizeof(int ## TYPE ## _t) << 3; \ + for (i = 0; i= lanesize)||(b.m64_i ## TYPE[i] <= -lanesize) ) res.m64_ ## SIGN ## TYPE[i] = 0; \ + else res.m64_ ## SIGN ## TYPE[i] = (b.m64_i ## TYPE[i] >=0) ? a.m64_ ## SIGN ## TYPE[i] << b.m64_i ## TYPE[i] : a.m64_ ## SIGN ## TYPE[i] >> (-b.m64_i ## TYPE[i]); } \ + return res; + +_NEON2SSESTORAGE int8x8_t vshl_s8(int8x8_t a, int8x8_t b); // VSHL.S8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x8_t vshl_s8(int8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT_64(8, i, 8) +} + +_NEON2SSESTORAGE int16x4_t vshl_s16(int16x4_t a, int16x4_t b); // VSHL.S16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x4_t vshl_s16(int16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT_64(16, i, 4) +} + +_NEON2SSESTORAGE int32x2_t vshl_s32(int32x2_t a, int32x2_t b); // VSHL.S32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vshl_s32(int32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT_64(32, i, 2) +} + +_NEON2SSESTORAGE int64x1_t vshl_s64(int64x1_t a, int64x1_t b); // VSHL.S64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vshl_s64(int64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT_64(64, i, 1) +} + +_NEON2SSESTORAGE uint8x8_t vshl_u8(uint8x8_t a, int8x8_t b); // VSHL.U8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x8_t vshl_u8(uint8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT_64(8, u, 8) +} + +_NEON2SSESTORAGE uint16x4_t vshl_u16(uint16x4_t a, int16x4_t b); // VSHL.s16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x4_t vshl_u16(uint16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT_64(16, u, 4) +} + +_NEON2SSESTORAGE uint32x2_t vshl_u32(uint32x2_t a, int32x2_t b); // VSHL.U32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vshl_u32(uint32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT_64(32, u, 2) +} + +_NEON2SSESTORAGE uint64x1_t vshl_u64(uint64x1_t a, int64x1_t b); // VSHL.U64 d0,d0,d0 +_NEON2SSE_INLINE uint64x1_t vshl_u64(uint64x1_t a, int64x1_t b) //if we use the SERIAL_SHIFT macro need to have the special processing for large numbers +{ + SERIAL_SHIFT_64(64, u, 1) +} + +_NEON2SSESTORAGE int8x16_t vshlq_s8(int8x16_t a, int8x16_t b); // VSHL.S8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT(int8_t, int8_t, 16, 16) +} + +_NEON2SSESTORAGE int16x8_t vshlq_s16(int16x8_t a, int16x8_t b); // VSHL.S16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ +#ifdef USE_AVX2 + __m256i mask, shl, shr, a32, b32, b_abs; + __m128i shl_1, shl_2; + a32 = _mm256_cvtepi16_epi32(a); + b32 = _mm256_cvtepi16_epi32(b); + if (_mm256_movemask_ps(_M256(b32)) == 0) + shl = _mm256_sllv_epi32(a32, b32); + else { + b_abs = _mm256_abs_epi32(b32); + mask = _mm256_cmpeq_epi32(b32, b_abs); + shl = _mm256_sllv_epi32(a32, b_abs); + shl = _mm256_and_si256(mask, shl); + shr = _mm256_srav_epi32(a32, b_abs); + shr = _mm256_andnot_si256(mask, shr); + shl = _mm256_or_si256(shl, shr); + } + shl = _mm256_shuffle_epi8(shl, *(__m256i*)mask8_32_even_odd); //return to 16 bit + shl_1 = _mm256_castsi256_si128(shl); + shl_2 = _mm256_extractf128_si256(shl, 1); + return _mm_unpacklo_epi64(shl_1, shl_2); +#else + SERIAL_SHIFT(int16_t, int16_t, 8, 8) +#endif +} + +_NEON2SSESTORAGE int32x4_t vshlq_s32(int32x4_t a, int32x4_t b); // VSHL.S32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ +#ifdef USE_AVX2 + //if all elements of b are positive, let's use the corresponding AVX function directly + if (_mm_movemask_ps(_M128(b)) == 0) + return _mm_sllv_epi32(a, b); + else { + __m128i mask, shl, shr, b_abs; + b_abs = _mm_abs_epi32(b); + mask = _mm_cmpeq_epi32(b, b_abs); + shl = _mm_sllv_epi32(a, b_abs); + shl = _mm_and_si128(mask, shl); + shr = _mm_srav_epi32(a, b_abs); + shr = _mm_andnot_si128(mask, shr); + return _mm_or_si128(shl, shr); + } +#else + SERIAL_SHIFT(int32_t, int32_t, 4, 4) +#endif +} + +_NEON2SSESTORAGE int64x2_t vshlq_s64(int64x2_t a, int64x2_t b); // VSHL.S64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT(int64_t, int64_t, 2, 2) +} + +_NEON2SSESTORAGE uint8x16_t vshlq_u8(uint8x16_t a, int8x16_t b); // VSHL.U8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT(uint8_t, int8_t, 16, 16) +} + +_NEON2SSESTORAGE uint16x8_t vshlq_u16(uint16x8_t a, int16x8_t b); // VSHL.s16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ +#ifdef USE_AVX2 + __m256i mask, shl, shr, a32, b32, b_abs; + __m128i shl_1, shl_2; + a32 = _mm256_cvtepu16_epi32(a); + b32 = _mm256_cvtepi16_epi32(b); + if (_mm256_movemask_ps(_M256(b32)) == 0) + shl = _mm256_sllv_epi32(a32, b32); + else { + b_abs = _mm256_abs_epi32(b32); + mask = _mm256_cmpeq_epi32(b32, b_abs); + shl = _mm256_sllv_epi32(a32, b_abs); + shl = _mm256_and_si256(mask, shl); + shr = _mm256_srlv_epi32(a32, b_abs); + shr = _mm256_andnot_si256(mask, shr); + shl = _mm256_or_si256(shl, shr); + } + shl = _mm256_shuffle_epi8(shl, *(__m256i*)mask8_32_even_odd); //return to 16 bit + shl_1 = _mm256_castsi256_si128(shl); + shl_2 = _mm256_extractf128_si256(shl, 1); + return _mm_unpacklo_epi64(shl_1, shl_2); +#else + SERIAL_SHIFT(uint16_t, int16_t, 8, 8) +#endif +} + +_NEON2SSESTORAGE uint32x4_t vshlq_u32(uint32x4_t a, int32x4_t b); // VSHL.U32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ +#ifdef USE_AVX2 + if (_mm_movemask_ps(_M128(b)) == 0) + return _mm_sllv_epi32(a, b); + else { + __m128i mask, shl, shr, b_abs; + b_abs = _mm_abs_epi32(b); + mask = _mm_cmpeq_epi32(b, b_abs); + shl = _mm_sllv_epi32(a, b_abs); + shl = _mm_and_si128(mask, shl); + shr = _mm_srlv_epi32(a, b_abs); + shr = _mm_andnot_si128(mask, shr); + return _mm_or_si128(shl, shr); + } +#else + SERIAL_SHIFT(uint32_t, int32_t, 4, 4) +#endif +} + +_NEON2SSESTORAGE uint64x2_t vshlq_u64(uint64x2_t a, int64x2_t b); // VSHL.U64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING( uint64x2_t vshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SHIFT(uint64_t, int64_t, 2, 2) +} + + +//*********** Vector saturating shift left: (negative values shift right) ********************** +//******************************************************************************************** +//No such operations in IA32 SIMD available yet, constant shift only available, so need to do the serial solution +#define SERIAL_SATURATING_SHIFT_SIGNED(TYPE, LENMAX, LEN) \ + _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX], btmp[LENMAX]; TYPE limit; int i; \ + int lanesize_1 = (sizeof(TYPE) << 3) - 1; \ + _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \ + for (i = 0; i> (-btmp[i]); \ + else{ \ + if (btmp[i]>lanesize_1) { \ + res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \ + }else{ \ + limit = (TYPE)1 << (lanesize_1 - btmp[i]); \ + if((atmp[i] >= limit)||(atmp[i] <= -limit)) \ + res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \ + else res[i] = atmp[i] << btmp[i]; }}}} \ + return _mm_load_si128((__m128i*)res); + +#define SERIAL_SATURATING_SHIFT_UNSIGNED(TYPE, LENMAX, LEN) \ + _NEON2SSE_ALIGN_16 _UNSIGNED_T(TYPE) atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 TYPE btmp[LENMAX]; _UNSIGNED_T(TYPE) limit; int i; \ + TYPE lanesize = (sizeof(TYPE) << 3); \ + _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \ + for (i = 0; i> (-btmp[i]); \ + else{ \ + if (btmp[i]>lanesize) res[i] = (_UNSIGNED_T(TYPE))(~0ll); \ + else{ \ + limit = (TYPE) 1 << (lanesize - btmp[i]); \ + res[i] = ( atmp[i] >= limit) ? (_UNSIGNED_T(TYPE))(~0ll) : atmp[i] << btmp[i]; }}}} \ + return _mm_load_si128((__m128i*)res); + +#define SERIAL_SATURATING_SHIFT_SIGNED_64(TYPE, LEN) \ + int ## TYPE ## x ## LEN ## _t res; int ## TYPE ## _t limit; int i; \ + int lanesize_1 = (sizeof( int ## TYPE ## _t) << 3) - 1; \ + for (i = 0; i> (-(b.m64_i ## TYPE[i])); \ + else{ \ + if (b.m64_i ## TYPE[i]>lanesize_1) { \ + res.m64_i ## TYPE[i] = ((_UNSIGNED_T(int ## TYPE ## _t))a.m64_i ## TYPE[i] >> lanesize_1 ) + ((int ## TYPE ## _t) 1 << lanesize_1) - 1; \ + }else{ \ + limit = (int ## TYPE ## _t) 1 << (lanesize_1 - b.m64_i ## TYPE[i]); \ + if((a.m64_i ## TYPE[i] >= limit)||(a.m64_i ## TYPE[i] <= -limit)) \ + res.m64_i ## TYPE[i] = ((_UNSIGNED_T(int ## TYPE ## _t))a.m64_i ## TYPE[i] >> lanesize_1 ) + ((int ## TYPE ## _t) 1 << lanesize_1) - 1; \ + else res.m64_i ## TYPE[i] = a.m64_i ## TYPE[i] << b.m64_i ## TYPE[i]; }}}} \ + return res; + +#define SERIAL_SATURATING_SHIFT_UNSIGNED_64(TYPE, LEN) \ + int ## TYPE ## x ## LEN ## _t res; _UNSIGNED_T(int ## TYPE ## _t) limit; int i; \ + int ## TYPE ## _t lanesize = (sizeof(int ## TYPE ## _t) << 3); \ + for (i = 0; i> (-(b.m64_i ## TYPE[i])); \ + else{ \ + if (b.m64_i ## TYPE[i]>lanesize) res.m64_u ## TYPE[i] = (_UNSIGNED_T(int ## TYPE ## _t))(~0ll); \ + else{ \ + limit = (int ## TYPE ## _t) 1 << (lanesize - b.m64_i ## TYPE[i]); \ + res.m64_u ## TYPE[i] = ( a.m64_u ## TYPE[i] >= limit) ? (_UNSIGNED_T(int ## TYPE ## _t))(~0ll) : a.m64_u ## TYPE[i] << b.m64_u ## TYPE[i]; }}}} \ + return res; + +_NEON2SSESTORAGE int8x8_t vqshl_s8(int8x8_t a, int8x8_t b); // VQSHL.S8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x8_t vqshl_s8(int8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED_64(8,8) +} + +_NEON2SSESTORAGE int16x4_t vqshl_s16(int16x4_t a, int16x4_t b); // VQSHL.S16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x4_t vqshl_s16(int16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED_64(16,4) +} + +_NEON2SSESTORAGE int32x2_t vqshl_s32(int32x2_t a, int32x2_t b); // VQSHL.S32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqshl_s32(int32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED_64(32,2) +} + +_NEON2SSESTORAGE int64x1_t vqshl_s64(int64x1_t a, int64x1_t b); // VQSHL.S64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vqshl_s64(int64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED_64(64,1) +} + +_NEON2SSESTORAGE uint8x8_t vqshl_u8(uint8x8_t a, int8x8_t b); // VQSHL.U8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x8_t vqshl_u8(uint8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED_64(8,8) +} + +_NEON2SSESTORAGE uint16x4_t vqshl_u16(uint16x4_t a, int16x4_t b); // VQSHL.s16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x4_t vqshl_u16(uint16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED_64(16,4) +} + +_NEON2SSESTORAGE uint32x2_t vqshl_u32(uint32x2_t a, int32x2_t b); // VQSHL.U32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vqshl_u32(uint32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED_64(32,2) +} + +_NEON2SSESTORAGE uint64x1_t vqshl_u64(uint64x1_t a, int64x1_t b); // VQSHL.U64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vqshl_u64(uint64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED_64(64,1) +} + +_NEON2SSESTORAGE int8x16_t vqshlq_s8(int8x16_t a, int8x16_t b); // VQSHL.S8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vqshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED(int8_t, 16, 16) +} + +_NEON2SSESTORAGE int16x8_t vqshlq_s16(int16x8_t a, int16x8_t b); // VQSHL.S16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vqshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED(int16_t, 8, 8) +} + +_NEON2SSESTORAGE int32x4_t vqshlq_s32(int32x4_t a, int32x4_t b); // VQSHL.S32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED(int32_t, 4, 4) +} + +_NEON2SSESTORAGE int64x2_t vqshlq_s64(int64x2_t a, int64x2_t b); // VQSHL.S64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_SIGNED(int64_t, 2, 2) +} + +_NEON2SSESTORAGE uint8x16_t vqshlq_u8(uint8x16_t a, int8x16_t b); // VQSHL.U8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vqshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED(int8_t, 16, 16) +} + +_NEON2SSESTORAGE uint16x8_t vqshlq_u16(uint16x8_t a, int16x8_t b); // VQSHL.s16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vqshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED(int16_t, 8, 8) +} + +_NEON2SSESTORAGE uint32x4_t vqshlq_u32(uint32x4_t a, int32x4_t b); // VQSHL.U32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vqshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED(int32_t, 4, 4) +} + +_NEON2SSESTORAGE uint64x2_t vqshlq_u64(uint64x2_t a, int64x2_t b); // VQSHL.U64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_SHIFT_UNSIGNED(int64_t, 2, 2) +} + + +//******** Vector rounding shift left: (negative values shift right) ********** +//**************************************************************************** +//No such operations in IA32 SIMD available yet, constant shift only available, so need to do the serial solution +//rounding makes sense for right shifts only. +#define SERIAL_ROUNDING_SHIFT(TYPE, INTERNAL_TYPE, LENMAX, LEN) \ + _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 INTERNAL_TYPE btmp[LENMAX]; INTERNAL_TYPE i, lanesize = sizeof(INTERNAL_TYPE) << 3; \ + _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \ + for (i = 0; i= 0) { \ + if(btmp[i] >= lanesize) res[i] = 0; \ + else res[i] = (atmp[i] << btmp[i]); \ + }else{ \ + res[i] = (btmp[i] < -lanesize) ? 0 : \ + (btmp[i] == -lanesize) ? (atmp[i] & ((INTERNAL_TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) : \ + (atmp[i] >> (-btmp[i])) + ( (atmp[i] & ((INTERNAL_TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) ); }} \ + return _mm_load_si128((__m128i*)res); + + +#define SERIAL_ROUNDING_SHIFT_64(TYPE, SIGN, LEN) \ + int ## TYPE ## x ## LEN ## _t res; int i; int lanesize = sizeof(int ## TYPE ## _t) << 3; \ + for (i = 0; i= 0) { \ + if(b.m64_i ## TYPE[i] >= lanesize) res.m64_ ## SIGN ## TYPE[i] = 0; \ + else res.m64_ ## SIGN ## TYPE[i] = (a.m64_ ## SIGN ## TYPE[i] << b.m64_i ## TYPE[i]); \ + }else{ \ + res.m64_ ## SIGN ## TYPE[i] = (b.m64_i ## TYPE[i] < -lanesize) ? 0 : \ + (b.m64_i ## TYPE[i] == -lanesize) ? (a.m64_ ## SIGN ## TYPE[i] & ((int ## TYPE ## _t) 1 << (-(b.m64_i ## TYPE[i]) - 1))) >> (-(b.m64_i ## TYPE[i]) - 1) : \ + (a.m64_ ## SIGN ## TYPE[i] >> (-(b.m64_i ## TYPE[i]))) + ( (a.m64_ ## SIGN ## TYPE[i] & ((int ## TYPE ## _t) 1 << (-(b.m64_i ## TYPE[i]) - 1))) >> (-(b.m64_i ## TYPE[i]) - 1) ); }} \ + return res; + + +_NEON2SSESTORAGE int8x8_t vrshl_s8(int8x8_t a, int8x8_t b); // VRSHL.S8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x8_t vrshl_s8(int8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(8,i,8) +} + +_NEON2SSESTORAGE int16x4_t vrshl_s16(int16x4_t a, int16x4_t b); // VRSHL.S16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x4_t vrshl_s16(int16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(16,i,4) +} + +_NEON2SSESTORAGE int32x2_t vrshl_s32(int32x2_t a, int32x2_t b); // VRSHL.S32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vrshl_s32(int32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(32,i,2) +} + +_NEON2SSESTORAGE int64x1_t vrshl_s64(int64x1_t a, int64x1_t b); // VRSHL.S64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vrshl_s64(int64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(64,i,1) +} + +_NEON2SSESTORAGE uint8x8_t vrshl_u8(uint8x8_t a, int8x8_t b); // VRSHL.U8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x8_t vrshl_u8(uint8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(8,u,8) +} + +_NEON2SSESTORAGE uint16x4_t vrshl_u16(uint16x4_t a, int16x4_t b); // VRSHL.s16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x4_t vrshl_u16(uint16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(16,u,4) +} + +_NEON2SSESTORAGE uint32x2_t vrshl_u32(uint32x2_t a, int32x2_t b); // VRSHL.U32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vrshl_u32(uint32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(32,u,2) +} + +_NEON2SSESTORAGE uint64x1_t vrshl_u64(uint64x1_t a, int64x1_t b); // VRSHL.U64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vrshl_u64(uint64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT_64(64,u,1) +} + +_NEON2SSESTORAGE int8x16_t vrshlq_s8(int8x16_t a, int8x16_t b); // VRSHL.S8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vrshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(int8_t, int8_t, 16, 16) +} + +_NEON2SSESTORAGE int16x8_t vrshlq_s16(int16x8_t a, int16x8_t b); // VRSHL.S16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vrshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(int16_t, int16_t, 8, 8) +} + +_NEON2SSESTORAGE int32x4_t vrshlq_s32(int32x4_t a, int32x4_t b); // VRSHL.S32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vrshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(int32_t, int32_t, 4, 4) +} + +_NEON2SSESTORAGE int64x2_t vrshlq_s64(int64x2_t a, int64x2_t b); // VRSHL.S64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vrshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(int64_t, int64_t, 2, 2) +} + +_NEON2SSESTORAGE uint8x16_t vrshlq_u8(uint8x16_t a, int8x16_t b); // VRSHL.U8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vrshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(uint8_t, int8_t, 16, 16) +} + +_NEON2SSESTORAGE uint16x8_t vrshlq_u16(uint16x8_t a, int16x8_t b); // VRSHL.s16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vrshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(uint16_t, int16_t, 8, 8) +} + +_NEON2SSESTORAGE uint32x4_t vrshlq_u32(uint32x4_t a, int32x4_t b); // VRSHL.U32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vrshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(uint32_t, int32_t, 4, 4) +} + +_NEON2SSESTORAGE uint64x2_t vrshlq_u64(uint64x2_t a, int64x2_t b); // VRSHL.U64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vrshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_ROUNDING_SHIFT(uint64_t, int64_t, 2, 2) +} + + +//********** Vector saturating rounding shift left: (negative values shift right) **************** +//************************************************************************************************* +//No such operations in IA32 SIMD unfortunately, constant shift only available, so need to do the serial solution +//Saturation happens for left shifts only while rounding makes sense for right shifts only. +#define SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(TYPE, LENMAX, LEN) \ + _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX], btmp[LENMAX]; TYPE limit; int i; \ + int lanesize_1 = (sizeof(TYPE) << 3) - 1; \ + _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \ + for (i = 0; i> (-btmp[i])) + ( (atmp[i] & ((TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) ); \ + else{ \ + if (btmp[i]>lanesize_1) { \ + res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \ + }else{ \ + limit = (TYPE)1 << (lanesize_1 - btmp[i]); \ + if((atmp[i] >= limit)||(atmp[i] <= -limit)) \ + res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \ + else res[i] = atmp[i] << btmp[i]; }}}} \ + return _mm_load_si128((__m128i*)res); + +#define SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(TYPE, LENMAX, LEN) \ + _NEON2SSE_ALIGN_16 _UNSIGNED_T(TYPE) atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 TYPE btmp[LENMAX]; _UNSIGNED_T(TYPE) limit; int i; \ + int lanesize = (sizeof(TYPE) << 3); \ + _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \ + for (i = 0; i> (-btmp[i])) + ( (atmp[i] & ((TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) ); \ + else{ \ + if (btmp[i]>lanesize) res[i] = (_UNSIGNED_T(TYPE))(~0ll); \ + else{ \ + limit = (TYPE) 1 << (lanesize - btmp[i]); \ + res[i] = ( atmp[i] >= limit) ? (_UNSIGNED_T(TYPE))(~0ll) : atmp[i] << btmp[i]; }}}} \ + return _mm_load_si128((__m128i*)res); + +#define SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED_64(TYPE, LEN) \ + __m64_128 res; int ## TYPE ## _t limit; int i; \ + int lanesize_1 = (sizeof(int ## TYPE ## _t ) << 3) - 1; \ + for (i = 0; i> (-(b.m64_i ## TYPE[i]))) + ( (a.m64_i ## TYPE[i] & ((int ## TYPE ## _t ) 1 << (-(b.m64_i ## TYPE[i]) - 1))) >> (-(b.m64_i ## TYPE[i]) - 1) ); \ + else{ \ + if (b.m64_i ## TYPE[i]>lanesize_1) { \ + res.m64_i ## TYPE[i] = ((_UNSIGNED_T(int ## TYPE ## _t ))a.m64_i ## TYPE[i] >> lanesize_1 ) + ((int ## TYPE ## _t ) 1 << lanesize_1) - 1; \ + }else{ \ + limit = (int ## TYPE ## _t ) 1 << (lanesize_1 - b.m64_i ## TYPE[i]); \ + if((a.m64_i ## TYPE[i] >= limit)||(a.m64_i ## TYPE[i] <= -limit)) \ + res.m64_i ## TYPE[i] = ((_UNSIGNED_T(int ## TYPE ## _t ))a.m64_i ## TYPE[i] >> lanesize_1 ) + ((int ## TYPE ## _t ) 1 << lanesize_1) - 1; \ + else res.m64_i ## TYPE[i] = a.m64_i ## TYPE[i] << b.m64_i ## TYPE[i]; }}}} \ + return res; + +#define SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED_64(TYPE, LEN) \ + __m64_128 res; _UNSIGNED_T(int ## TYPE ## _t) limit; int i; \ + int lanesize = (sizeof(int ## TYPE ## _t) << 3); \ + for (i = 0; i> (-(b.m64_i ## TYPE[i]))) + ( (a.m64_u ## TYPE[i] & ((int ## TYPE ## _t) 1 << (-(b.m64_i ## TYPE[i]) - 1))) >> (-(b.m64_i ## TYPE[i]) - 1) ); \ + else{ \ + if (b.m64_i ## TYPE[i]>lanesize) res.m64_u ## TYPE[i] = (_UNSIGNED_T(int ## TYPE ## _t))(~0ll); \ + else{ \ + limit = (int ## TYPE ## _t) 1 << (lanesize - b.m64_i ## TYPE[i]); \ + res.m64_u ## TYPE[i] = ( a.m64_u ## TYPE[i] >= limit) ? (_UNSIGNED_T(int ## TYPE ## _t))(~0ll) : a.m64_u ## TYPE[i] << b.m64_i ## TYPE[i]; }}}} \ + return res; + +_NEON2SSESTORAGE int8x8_t vqrshl_s8(int8x8_t a, int8x8_t b); // VQRSHL.S8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x8_t vqrshl_s8(int8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED_64(8,8) +} + +_NEON2SSESTORAGE int16x4_t vqrshl_s16(int16x4_t a, int16x4_t b); // VQRSHL.S16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x4_t vqrshl_s16(int16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED_64(16,4) +} + +_NEON2SSESTORAGE int32x2_t vqrshl_s32(int32x2_t a, int32x2_t b); // VQRSHL.S32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqrshl_s32(int32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED_64(32,2) +} + +_NEON2SSESTORAGE int64x1_t vqrshl_s64(int64x1_t a, int64x1_t b); // VQRSHL.S64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vqrshl_s64(int64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED_64(64,1) +} + +_NEON2SSESTORAGE uint8x8_t vqrshl_u8(uint8x8_t a, int8x8_t b); // VQRSHL.U8 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x8_t vqrshl_u8(uint8x8_t a, int8x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED_64(8,8) +} + +_NEON2SSESTORAGE uint16x4_t vqrshl_u16(uint16x4_t a, int16x4_t b); // VQRSHL.s16 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x4_t vqrshl_u16(uint16x4_t a, int16x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED_64(16,4) +} + +_NEON2SSESTORAGE uint32x2_t vqrshl_u32(uint32x2_t a, int32x2_t b); // VQRSHL.U32 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vqrshl_u32(uint32x2_t a, int32x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED_64(32,2) +} + +_NEON2SSESTORAGE uint64x1_t vqrshl_u64(uint64x1_t a, int64x1_t b); // VQRSHL.U64 d0,d0,d0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vqrshl_u64(uint64x1_t a, int64x1_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED_64(64,1) +} + +_NEON2SSESTORAGE int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b); // VQRSHL.S8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int8_t, 16, 16) +} + +_NEON2SSESTORAGE int16x8_t vqrshlq_s16(int16x8_t a, int16x8_t b); // VQRSHL.S16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vqrshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int16_t, 8, 8) +} + +_NEON2SSESTORAGE int32x4_t vqrshlq_s32(int32x4_t a, int32x4_t b); // VQRSHL.S32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqrshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int32_t, 4, 4) +} + +_NEON2SSESTORAGE int64x2_t vqrshlq_s64(int64x2_t a, int64x2_t b); // VQRSHL.S64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqrshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int64_t, 2, 2) +} + +_NEON2SSESTORAGE uint8x16_t vqrshlq_u8(uint8x16_t a, int8x16_t b); // VQRSHL.U8 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vqrshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int8_t, 16, 16) +} + +_NEON2SSESTORAGE uint16x8_t vqrshlq_u16(uint16x8_t a, int16x8_t b); // VQRSHL.s16 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vqrshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int16_t, 8, 8) +} + +_NEON2SSESTORAGE uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b); // VQRSHL.U32 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int32_t, 4, 4) +} + +_NEON2SSESTORAGE uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b); // VQRSHL.U64 q0,q0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int64_t, 2, 2) +} + +// ********************************************************************************* +// ***************************** Shifts by a constant ***************************** +// ********************************************************************************* +//**************** Vector shift right by constant************************************* +//************************************************************************************ +_NEON2SSESTORAGE int8x8_t vshr_n_s8(int8x8_t a, __constrange(1,8) int b); // VSHR.S8 d0,d0,#8 +_NEON2SSE_INLINE int8x8_t vshr_n_s8(int8x8_t a, __constrange(1,8) int b) // VSHR.S8 d0,d0,#8 +{ + //no 8 bit shift available, go to 16 bit + int8x8_t res64; + __m128i r; + r = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE 4.1 + r = _mm_srai_epi16 (r, b); //SSE2 + r = _mm_packs_epi16 (r,r); //we need 64 bits only + return64(r); +} + +_NEON2SSESTORAGE int16x4_t vshr_n_s16(int16x4_t a, __constrange(1,16) int b); // VSHR.S16 d0,d0,#16 +_NEON2SSE_INLINE int16x4_t vshr_n_s16(int16x4_t a, __constrange(1,16) int b) +{ + int16x4_t res64; + return64(_mm_srai_epi16(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE int32x2_t vshr_n_s32(int32x2_t a, __constrange(1,32) int b); // VSHR.S32 d0,d0,#32 +_NEON2SSE_INLINE int32x2_t vshr_n_s32(int32x2_t a, __constrange(1,32) int b) +{ + int32x2_t res64; + return64(_mm_srai_epi32(_pM128i(a), b)); +} + +_NEON2SSESTORAGE int64x1_t vshr_n_s64(int64x1_t a, __constrange(1,64) int b); // VSHR.S64 d0,d0,#64 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vshr_n_s64(int64x1_t a, __constrange(1,64) int b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //no arithmetic shift for 64bit values, serial solution used + int64x1_t res; + if(b>=64) res.m64_i64[0] = 0; + else res.m64_i64[0] = (*(int64_t*)&a) >> b; + return res; +} + +_NEON2SSESTORAGE uint8x8_t vshr_n_u8(uint8x8_t a, __constrange(1,8) int b); // VSHR.U8 d0,d0,#8 +_NEON2SSE_INLINE uint8x8_t vshr_n_u8(uint8x8_t a, __constrange(1,8) int b) // VSHR.U8 d0,d0,#8 +{ + //no 8 bit shift available, go to 16 bit + uint8x8_t res64; + __m128i r; + r = _MM_CVTEPU8_EPI16 (_pM128i(a)); //SSE 4.1 + r = _mm_srli_epi16 (r, b); //for unsigned variables we use the logical shift not arithmetical one + r = _mm_packus_epi16 (r,r); //we need 64 bits only + return64(r); +} + +_NEON2SSESTORAGE uint16x4_t vshr_n_u16(uint16x4_t a, __constrange(1,16) int b); // VSHR.s16 d0,d0,#16 +_NEON2SSE_INLINE uint16x4_t vshr_n_u16(uint16x4_t a, __constrange(1,16) int b) +{ + uint16x4_t res64; + return64(_mm_srli_epi16(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE uint32x2_t vshr_n_u32(uint32x2_t a, __constrange(1,32) int b); // VSHR.U32 d0,d0,#32 +_NEON2SSE_INLINE uint32x2_t vshr_n_u32(uint32x2_t a, __constrange(1,32) int b) +{ + uint32x2_t res64; + return64(_mm_srli_epi32(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE uint64x1_t vshr_n_u64(uint64x1_t a, __constrange(1,64) int b); // VSHR.U64 d0,d0,#64 +_NEON2SSE_INLINE uint64x1_t vshr_n_u64(uint64x1_t a, __constrange(1,64) int b) +{ + uint64x1_t res64; + return64(_mm_srli_epi64(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE int8x16_t vshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VSHR.S8 q0,q0,#8 +_NEON2SSE_INLINE int8x16_t vshrq_n_s8(int8x16_t a, __constrange(1,8) int b) // VSHR.S8 q0,q0,#8 +{ + //no 8 bit shift available, go to 16 bit trick + __m128i zero, mask0, a_sign, r, a_sign_mask; + _NEON2SSE_ALIGN_16 static const int16_t mask0_16[9] = {0x0000, 0x0080, 0x00c0, 0x00e0, 0x00f0, 0x00f8, 0x00fc, 0x00fe, 0x00ff}; + zero = _mm_setzero_si128(); + mask0 = _mm_set1_epi16(mask0_16[b]); //to mask the bits to be "spoiled" by 16 bit shift + a_sign = _mm_cmpgt_epi8 (zero, a); //ff if a<0 or zero if a>0 + r = _mm_srai_epi16 (a, b); + a_sign_mask = _mm_and_si128 (mask0, a_sign); + r = _mm_andnot_si128 (mask0, r); + return _mm_or_si128 (r, a_sign_mask); +} + +_NEON2SSE_GLOBAL int16x8_t vshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VSHR.S16 q0,q0,#16 +#define vshrq_n_s16 _mm_srai_epi16 + +_NEON2SSE_GLOBAL int32x4_t vshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VSHR.S32 q0,q0,#32 +#define vshrq_n_s32 _mm_srai_epi32 + +_NEON2SSESTORAGE int64x2_t vshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VSHR.S64 q0,q0,#64 +_NEON2SSE_INLINE int64x2_t vshrq_n_s64(int64x2_t a, __constrange(1,64) int b) +{ + //SIMD implementation may be not optimal due to 64 bit arithmetic shift absence in x86 SIMD + __m128i c1, signmask,a0, res64; + _NEON2SSE_ALIGN_16 static const uint64_t mask[] = {0x8000000000000000, 0x8000000000000000}; + c1 = _mm_cmpeq_epi32(a,a); //0xffffffffffffffff + signmask = _mm_slli_epi64 (c1, (64 - b)); + a0 = _mm_or_si128(a, *(__m128i*)mask); //get the first bit + a0 = _MM_CMPEQ_EPI64 (a, a0); + signmask = _mm_and_si128(a0, signmask); + res64 = _mm_srli_epi64 (a, b); + return _mm_or_si128(res64, signmask); +} + +_NEON2SSESTORAGE uint8x16_t vshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VSHR.U8 q0,q0,#8 +_NEON2SSE_INLINE uint8x16_t vshrq_n_u8(uint8x16_t a, __constrange(1,8) int b) // VSHR.U8 q0,q0,#8 +{ + //no 8 bit shift available, need the special trick + __m128i mask0, r; + _NEON2SSE_ALIGN_16 static const uint16_t mask10_16[9] = {0xffff, 0xff7f, 0xff3f, 0xff1f, 0xff0f, 0xff07, 0xff03, 0xff01, 0xff00}; + mask0 = _mm_set1_epi16(mask10_16[b]); //to mask the bits to be "spoiled" by 16 bit shift + r = _mm_srli_epi16 ( a, b); + return _mm_and_si128 (r, mask0); +} + +_NEON2SSE_GLOBAL uint16x8_t vshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VSHR.s16 q0,q0,#16 +#define vshrq_n_u16 _mm_srli_epi16 + +_NEON2SSE_GLOBAL uint32x4_t vshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VSHR.U32 q0,q0,#32 +#define vshrq_n_u32 _mm_srli_epi32 + +_NEON2SSE_GLOBAL uint64x2_t vshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VSHR.U64 q0,q0,#64 +#define vshrq_n_u64 _mm_srli_epi64 + +//*************************** Vector shift left by constant ************************* +//********************************************************************************* +_NEON2SSESTORAGE int8x8_t vshl_n_s8(int8x8_t a, __constrange(0,7) int b); // VSHL.I8 d0,d0,#0 +_NEON2SSE_INLINE int8x8_t vshl_n_s8(int8x8_t a, __constrange(0,7) int b) // VSHL.I8 d0,d0,#0 +{ + //no 8 bit shift available, go to 16 bit + int8x8_t res64; + __m128i r; + r = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE 4.1 + r = _mm_slli_epi16 (r, b); //SSE2 + r = _mm_shuffle_epi8 (r, *(__m128i*) mask8_16_even_odd); //return to 8 bit, we need 64 bits only + return64(r); +} + +_NEON2SSESTORAGE int16x4_t vshl_n_s16(int16x4_t a, __constrange(0,15) int b); // VSHL.I16 d0,d0,#0 +_NEON2SSE_INLINE int16x4_t vshl_n_s16(int16x4_t a, __constrange(0,15) int b) +{ + int16x4_t res64; + return64(_mm_slli_epi16(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE int32x2_t vshl_n_s32(int32x2_t a, __constrange(0,31) int b); // VSHL.I32 d0,d0,#0 +_NEON2SSE_INLINE int32x2_t vshl_n_s32(int32x2_t a, __constrange(0,31) int b) +{ + int32x2_t res64; + return64(_mm_slli_epi32(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE int64x1_t vshl_n_s64(int64x1_t a, __constrange(0,63) int b); // VSHL.I64 d0,d0,#0 +_NEON2SSE_INLINE int64x1_t vshl_n_s64(int64x1_t a, __constrange(0,63) int b) +{ + int64x1_t res64; + return64(_mm_slli_epi64(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE uint8x8_t vshl_n_u8(uint8x8_t a, __constrange(0,7) int b); // VSHL.I8 d0,d0,#0 +_NEON2SSE_INLINE uint8x8_t vshl_n_u8(uint8x8_t a, __constrange(0,7) int b) +{ + //no 8 bit shift available, go to 16 bit + uint8x8_t res64; + __m128i mask8; + __m128i r; + mask8 = _mm_set1_epi16(0xff); + r = _MM_CVTEPU8_EPI16 (_pM128i(a)); //SSE 4.1 + r = _mm_slli_epi16 (r, b); //SSE2 + r = _mm_and_si128(r, mask8); //to avoid saturation + r = _mm_packus_epi16 (r,r); //we need 64 bits only + return64(r); +} + +_NEON2SSE_GLOBAL uint16x4_t vshl_n_u16(uint16x4_t a, __constrange(0,15) int b); // VSHL.I16 d0,d0,#0 +#define vshl_n_u16 vshl_n_s16 + + +_NEON2SSE_GLOBAL uint32x2_t vshl_n_u32(uint32x2_t a, __constrange(0,31) int b); // VSHL.I32 d0,d0,#0 +#define vshl_n_u32 vshl_n_s32 + +_NEON2SSE_GLOBAL uint64x1_t vshl_n_u64(uint64x1_t a, __constrange(0,63) int b); // VSHL.I64 d0,d0,#0 +#define vshl_n_u64 vshl_n_s64 + +_NEON2SSE_GLOBAL int8x16_t vshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0 +#define vshlq_n_s8 vshlq_n_u8 + +_NEON2SSE_GLOBAL int16x8_t vshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0 +#define vshlq_n_s16 _mm_slli_epi16 + +_NEON2SSE_GLOBAL int32x4_t vshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0 +#define vshlq_n_s32 _mm_slli_epi32 + +_NEON2SSE_GLOBAL int64x2_t vshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0 +#define vshlq_n_s64 _mm_slli_epi64 + +_NEON2SSESTORAGE uint8x16_t vshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0 +_NEON2SSE_INLINE uint8x16_t vshlq_n_u8(uint8x16_t a, __constrange(0,7) int b) +{ + //no 8 bit shift available, need the special trick + __m128i mask0, r; + _NEON2SSE_ALIGN_16 static const uint16_t mask10_16[9] = {0xffff, 0xfeff, 0xfcff, 0xf8ff, 0xf0ff, 0xe0ff, 0xc0ff, 0x80ff, 0xff}; + mask0 = _mm_set1_epi16(mask10_16[b]); //to mask the bits to be "spoiled" by 16 bit shift + r = _mm_slli_epi16 ( a, b); + return _mm_and_si128 (r, mask0); +} + +_NEON2SSE_GLOBAL uint16x8_t vshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0 +#define vshlq_n_u16 vshlq_n_s16 + +_NEON2SSE_GLOBAL uint32x4_t vshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0 +#define vshlq_n_u32 vshlq_n_s32 + +_NEON2SSE_GLOBAL uint64x2_t vshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0 +#define vshlq_n_u64 vshlq_n_s64 + +//************* Vector rounding shift right by constant ****************** +//************************************************************************* +//No corresponding x86 intrinsics exist, need to do some tricks +_NEON2SSESTORAGE int8x8_t vrshr_n_s8(int8x8_t a, __constrange(1,8) int b); // VRSHR.S8 d0,d0,#8 +_NEON2SSE_INLINE int8x8_t vrshr_n_s8(int8x8_t a, __constrange(1,8) int b) // VRSHR.S8 d0,d0,#8 +{ + //no 8 bit shift available, go to 16 bit + int8x8_t res64; + __m128i r, maskb; + r = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE 4.1 + maskb = _mm_slli_epi16 (r, (16 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi16 (maskb, 15); //1 or 0 + r = _mm_srai_epi16 (r, b); + r = _mm_add_epi16 (r, maskb); //actual rounding + r = _mm_packs_epi16 (r,r); ////we need 64 bits only + return64(r); +} + +_NEON2SSESTORAGE int16x4_t vrshr_n_s16(int16x4_t a, __constrange(1,16) int b); // VRSHR.S16 d0,d0,#16 +_NEON2SSE_INLINE int16x4_t vrshr_n_s16(int16x4_t a, __constrange(1,16) int b) +{ + int16x4_t res64; + return64(vrshrq_n_s16(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE int32x2_t vrshr_n_s32(int32x2_t a, __constrange(1,32) int b); // VRSHR.S32 d0,d0,#32 +_NEON2SSE_INLINE int32x2_t vrshr_n_s32(int32x2_t a, __constrange(1,32) int b) +{ + int32x2_t res64; + return64(vrshrq_n_s32(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE int64x1_t vrshr_n_s64(int64x1_t a, __constrange(1,64) int b); // VRSHR.S64 d0,d0,#64 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vrshr_n_s64(int64x1_t a, __constrange(1,64) int b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + //serial solution is faster + int64x1_t res; + int64_t a_i64 = *( int64_t*)&a; + if(b==64) { + res.m64_i64[0] = 0; //for some compilers rounding happens and we need to use(a_i64 & _SIGNBIT64)>>63; + } else { + int64_t maskb = a_i64 & (( int64_t)1 << (b - 1)); + res.m64_i64[0] = (a_i64 >> b) + (maskb >> (b - 1)); + } + return res; +} + +_NEON2SSESTORAGE uint8x8_t vrshr_n_u8(uint8x8_t a, __constrange(1,8) int b); // VRSHR.U8 d0,d0,#8 +_NEON2SSE_INLINE uint8x8_t vrshr_n_u8(uint8x8_t a, __constrange(1,8) int b) // VRSHR.U8 d0,d0,#8 +{ + //no 8 bit shift available, go to 16 bit, solution may be not optimal compared with the serial one + uint8x8_t res64; + __m128i r, maskb; + r = _MM_CVTEPU8_EPI16 (_pM128i(a)); //SSE 4.1 + maskb = _mm_slli_epi16 (r, (16 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi16 (maskb, 15); //1 or 0 + r = _mm_srli_epi16 (r, b); + r = _mm_add_epi16 (r, maskb); //actual rounding + r = _mm_packus_epi16 (r,r); ////we need 64 bits only + return64(r); +} + +_NEON2SSESTORAGE uint16x4_t vrshr_n_u16(uint16x4_t a, __constrange(1,16) int b); // VRSHR.s16 d0,d0,#16 +_NEON2SSE_INLINE uint16x4_t vrshr_n_u16(uint16x4_t a, __constrange(1,16) int b) +{ + uint16x4_t res64; + return64(vrshrq_n_u16(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE uint32x2_t vrshr_n_u32(uint32x2_t a, __constrange(1,32) int b); // VRSHR.U32 d0,d0,#32 +_NEON2SSE_INLINE uint32x2_t vrshr_n_u32(uint32x2_t a, __constrange(1,32) int b) +{ + uint32x2_t res64; + return64(vrshrq_n_u32(_pM128i(a), b)); +} + + +_NEON2SSESTORAGE uint64x1_t vrshr_n_u64(uint64x1_t a, __constrange(1,64) int b); // VRSHR.U64 d0,d0,#64 +_NEON2SSE_INLINE uint64x1_t vrshr_n_u64(uint64x1_t a, __constrange(1,64) int b) +{ + uint64x1_t res64; + return64(vrshrq_n_u64(_pM128i(a), b)); +} + +_NEON2SSESTORAGE int8x16_t vrshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VRSHR.S8 q0,q0,#8 +_NEON2SSE_INLINE int8x16_t vrshrq_n_s8(int8x16_t a, __constrange(1,8) int b) // VRSHR.S8 q0,q0,#8 +{ + //no 8 bit shift available, go to 16 bit trick + __m128i r, mask1, maskb; + _NEON2SSE_ALIGN_16 static const uint16_t mask2b[9] = {0x0000, 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080}; // 2^b-th bit set to 1 + r = vshrq_n_s8 (a, b); + mask1 = _mm_set1_epi16(mask2b[b]); // 2^b-th bit set to 1 for 16bit, need it for rounding + maskb = _mm_and_si128(a, mask1); //get b or 0 for rounding + maskb = _mm_srli_epi16 (maskb, b - 1); // to add 1 + return _mm_add_epi8(r, maskb); //actual rounding +} + +_NEON2SSESTORAGE int16x8_t vrshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VRSHR.S16 q0,q0,#16 +_NEON2SSE_INLINE int16x8_t vrshrq_n_s16(int16x8_t a, __constrange(1,16) int b) // VRSHR.S16 q0,q0,#16 +{ + __m128i maskb, r; + maskb = _mm_slli_epi16(a, (16 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi16(maskb, 15); //1 or 0 + r = _mm_srai_epi16 (a, b); + return _mm_add_epi16 (r, maskb); //actual rounding +} + +_NEON2SSESTORAGE int32x4_t vrshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VRSHR.S32 q0,q0,#32 +_NEON2SSE_INLINE int32x4_t vrshrq_n_s32(int32x4_t a, __constrange(1,32) int b) // VRSHR.S32 q0,q0,#32 +{ + __m128i maskb, r; + maskb = _mm_slli_epi32 (a, (32 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi32 (maskb,31); //1 or 0 + r = _mm_srai_epi32(a, b); + return _mm_add_epi32 (r, maskb); //actual rounding +} + +_NEON2SSESTORAGE int64x2_t vrshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VRSHR.S64 q0,q0,#64 +_NEON2SSE_INLINE int64x2_t vrshrq_n_s64(int64x2_t a, __constrange(1,64) int b) +{ + //solution may be not optimal compared with a serial one + __m128i maskb; + int64x2_t r; + maskb = _mm_slli_epi64 (a, (64 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi64 (maskb,63); //1 or 0 + r = vshrq_n_s64(a, b); + return _mm_add_epi64 (r, maskb); //actual rounding +} + +_NEON2SSESTORAGE uint8x16_t vrshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VRSHR.U8 q0,q0,#8 +_NEON2SSE_INLINE uint8x16_t vrshrq_n_u8(uint8x16_t a, __constrange(1,8) int b) // VRSHR.U8 q0,q0,#8 +{ + //no 8 bit shift available, go to 16 bit trick + __m128i r, mask1, maskb; + _NEON2SSE_ALIGN_16 static const uint16_t mask2b[9] = {0x0000, 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080}; // 2^b-th bit set to 1 + r = vshrq_n_u8 (a, b); + mask1 = _mm_set1_epi16(mask2b[b]); // 2^b-th bit set to 1 for 16bit, need it for rounding + maskb = _mm_and_si128(a, mask1); //get b or 0 for rounding + maskb = _mm_srli_epi16 (maskb, b - 1); // to add 1 + return _mm_add_epi8(r, maskb); //actual rounding +} + +_NEON2SSESTORAGE uint16x8_t vrshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VRSHR.s16 q0,q0,#16 +_NEON2SSE_INLINE uint16x8_t vrshrq_n_u16(uint16x8_t a, __constrange(1,16) int b) // VRSHR.S16 q0,q0,#16 +{ + __m128i maskb, r; + maskb = _mm_slli_epi16(a, (16 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi16(maskb, 15); //1 or 0 + r = _mm_srli_epi16 (a, b); + return _mm_add_epi16 (r, maskb); //actual rounding +} + +_NEON2SSESTORAGE uint32x4_t vrshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VRSHR.U32 q0,q0,#32 +_NEON2SSE_INLINE uint32x4_t vrshrq_n_u32(uint32x4_t a, __constrange(1,32) int b) // VRSHR.S32 q0,q0,#32 +{ + __m128i maskb, r; + maskb = _mm_slli_epi32 (a, (32 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi32 (maskb,31); //1 or 0 + r = _mm_srli_epi32(a, b); + return _mm_add_epi32 (r, maskb); //actual rounding +} + +_NEON2SSESTORAGE uint64x2_t vrshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VRSHR.U64 q0,q0,#64 +_NEON2SSE_INLINE uint64x2_t vrshrq_n_u64(uint64x2_t a, __constrange(1,64) int b) +{ + //solution may be not optimal compared with a serial one + __m128i maskb, r; + maskb = _mm_slli_epi64 (a, (64 - b)); //to get rounding (b-1)th bit + maskb = _mm_srli_epi64 (maskb,63); //1 or 0 + r = _mm_srli_epi64(a, b); + return _mm_add_epi64 (r, maskb); //actual rounding +} + +//************* Vector shift right by constant and accumulate ********* +//********************************************************************* +_NEON2SSESTORAGE int8x8_t vsra_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c); // VSRA.S8 d0,d0,#8 +_NEON2SSE_INLINE int8x8_t vsra_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c) // VSRA.S8 d0,d0,#8 +{ + int8x8_t shift; + shift = vshr_n_s8(b, c); + return vadd_s8( a, shift); +} + +_NEON2SSESTORAGE int16x4_t vsra_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c); // VSRA.S16 d0,d0,#16 +_NEON2SSE_INLINE int16x4_t vsra_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c) // VSRA.S16 d0,d0,#16 +{ + int16x4_t shift; + shift = vshr_n_s16( b, c); + return vadd_s16(a, shift); +} + +_NEON2SSESTORAGE int32x2_t vsra_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c); // VSRA.S32 d0,d0,#32 +_NEON2SSE_INLINE int32x2_t vsra_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c) // VSRA.S32 d0,d0,#32 +{ + //may be not optimal compared with the serial execution + int32x2_t shift; + shift = vshr_n_s32(b, c); + return vadd_s32( a, shift); +} + +_NEON2SSESTORAGE int64x1_t vsra_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c); // VSRA.S64 d0,d0,#64 +_NEON2SSE_INLINE int64x1_t vsra_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c) +{ + //may be not optimal compared with a serial solution + int64x1_t shift; + shift = vshr_n_s64(b, c); + return vadd_s64( a, shift); +} + +_NEON2SSESTORAGE uint8x8_t vsra_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c); // VSRA.U8 d0,d0,#8 +_NEON2SSE_INLINE uint8x8_t vsra_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c) // VSRA.U8 d0,d0,#8 +{ + uint8x8_t shift; + shift = vshr_n_u8(b, c); + return vadd_u8(a, shift); +} + +_NEON2SSESTORAGE uint16x4_t vsra_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c); // VSRA.s16 d0,d0,#16 +_NEON2SSE_INLINE uint16x4_t vsra_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c) // VSRA.s16 d0,d0,#16 +{ + uint16x4_t shift; + shift = vshr_n_u16(b, c); + return vadd_u16(a,shift); +} + +_NEON2SSESTORAGE uint32x2_t vsra_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c); // VSRA.U32 d0,d0,#32 +_NEON2SSE_INLINE uint32x2_t vsra_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c) // VSRA.U32 d0,d0,#32 +{ + //may be not optimal compared with the serial execution + uint32x2_t shift; + shift = vshr_n_u32(b, c); + return vadd_u32( a, shift); +} + +_NEON2SSESTORAGE uint64x1_t vsra_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c); // VSRA.U64 d0,d0,#64 +_NEON2SSE_INLINE uint64x1_t vsra_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c) // VSRA.U64 d0,d0,#64 +{ + //may be not optimal compared with the serial execution + uint64x1_t shift; + shift = vshr_n_u64(b, c); + return vadd_u64(a, shift); +} + +_NEON2SSESTORAGE int8x16_t vsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRA.S8 q0,q0,#8 +_NEON2SSE_INLINE int8x16_t vsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c) // VSRA.S8 q0,q0,#8 +{ + int8x16_t shift; + shift = vshrq_n_s8(b, c); + return vaddq_s8(a, shift); +} + +_NEON2SSESTORAGE int16x8_t vsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRA.S16 q0,q0,#16 +_NEON2SSE_INLINE int16x8_t vsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c) // VSRA.S16 q0,q0,#16 +{ + int16x8_t shift; + shift = vshrq_n_s16(b, c); + return vaddq_s16(a, shift); +} + +_NEON2SSESTORAGE int32x4_t vsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRA.S32 q0,q0,#32 +_NEON2SSE_INLINE int32x4_t vsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c) // VSRA.S32 q0,q0,#32 +{ + int32x4_t shift; + shift = vshrq_n_s32(b, c); + return vaddq_s32(a, shift); +} + +_NEON2SSESTORAGE int64x2_t vsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRA.S64 q0,q0,#64 +_NEON2SSE_INLINE int64x2_t vsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c) // VSRA.S64 q0,q0,#64 +{ + int64x2_t shift; + shift = vshrq_n_s64(b, c); + return vaddq_s64( a, shift); +} + +_NEON2SSESTORAGE uint8x16_t vsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRA.U8 q0,q0,#8 +_NEON2SSE_INLINE uint8x16_t vsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c) // VSRA.U8 q0,q0,#8 +{ + uint8x16_t shift; + shift = vshrq_n_u8(b, c); + return vaddq_u8(a, shift); +} + +_NEON2SSESTORAGE uint16x8_t vsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRA.s16 q0,q0,#16 +_NEON2SSE_INLINE uint16x8_t vsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c) // VSRA.s16 q0,q0,#16 +{ + uint16x8_t shift; + shift = vshrq_n_u16(b, c); + return vaddq_u16(a, shift); +} + +_NEON2SSESTORAGE uint32x4_t vsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRA.U32 q0,q0,#32 +_NEON2SSE_INLINE uint32x4_t vsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c) // VSRA.U32 q0,q0,#32 +{ + uint32x4_t shift; + shift = vshrq_n_u32(b, c); + return vaddq_u32(a, shift); +} + +_NEON2SSESTORAGE uint64x2_t vsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRA.U64 q0,q0,#64 +_NEON2SSE_INLINE uint64x2_t vsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c) // VSRA.U64 q0,q0,#64 +{ + uint64x2_t shift; + shift = vshrq_n_u64(b, c); + return vaddq_u64(a, shift); +} + +//************* Vector rounding shift right by constant and accumulate **************************** +//************************************************************************************************ +_NEON2SSESTORAGE int8x8_t vrsra_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c); // VRSRA.S8 d0,d0,#8 +_NEON2SSE_INLINE int8x8_t vrsra_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c) // VRSRA.S8 d0,d0,#8 +{ + int8x8_t shift; + shift = vrshr_n_s8(b, c); + return vadd_s8( a, shift); +} + +_NEON2SSESTORAGE int16x4_t vrsra_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c); // VRSRA.S16 d0,d0,#16 +_NEON2SSE_INLINE int16x4_t vrsra_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c) // VRSRA.S16 d0,d0,#16 +{ + int16x4_t shift; + shift = vrshr_n_s16( b, c); + return vadd_s16(a, shift); +} + +_NEON2SSESTORAGE int32x2_t vrsra_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c); // VRSRA.S32 d0,d0,#32 +_NEON2SSE_INLINE int32x2_t vrsra_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c) // VRSRA.S32 d0,d0,#32 +{ + //may be not optimal compared with the serial execution + int32x2_t shift; + shift = vrshr_n_s32(b, c); + return vadd_s32( a, shift); +} + +_NEON2SSESTORAGE int64x1_t vrsra_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c); // VRSRA.S64 d0,d0,#64 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vrsra_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c), _NEON2SSE_REASON_SLOW_SERIAL) //serial solution +{ + int64x1_t shift; + shift = vrshr_n_s64(b, c); + return vadd_s64( a, shift); +} + +_NEON2SSESTORAGE uint8x8_t vrsra_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c); // VRSRA.U8 d0,d0,#8 +_NEON2SSE_INLINE uint8x8_t vrsra_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c) // VRSRA.U8 d0,d0,#8 +{ + uint8x8_t shift; + shift = vrshr_n_u8(b, c); + return vadd_u8(a, shift); +} + +_NEON2SSESTORAGE uint16x4_t vrsra_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c); // VRSRA.s16 d0,d0,#16 +_NEON2SSE_INLINE uint16x4_t vrsra_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c) // VRSRA.s16 d0,d0,#16 +{ + uint16x4_t shift; + shift = vrshr_n_u16(b, c); + return vadd_u16(a,shift); +} + +_NEON2SSESTORAGE uint32x2_t vrsra_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c); // VRSRA.U32 d0,d0,#32 +_NEON2SSE_INLINE uint32x2_t vrsra_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c) // VRSRA.U32 d0,d0,#32 +{ + //may be not optimal compared with the serial execution + uint32x2_t shift; + shift = vrshr_n_u32(b, c); + return vadd_u32( a, shift); +} + +_NEON2SSESTORAGE uint64x1_t vrsra_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c); // VRSRA.U64 d0,d0,#64 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vrsra_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c), _NEON2SSE_REASON_SLOW_SERIAL) //serial solution +{ + //may be not optimal compared with the serial execution + uint64x1_t shift; + shift = vrshr_n_u64(b, c); + return vadd_u64( a, shift); +} + +_NEON2SSESTORAGE int8x16_t vrsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VRSRA.S8 q0,q0,#8 +_NEON2SSE_INLINE int8x16_t vrsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c) // VRSRA.S8 q0,q0,#8 +{ + int8x16_t shift; + shift = vrshrq_n_s8(b, c); + return vaddq_s8(a, shift); +} + +_NEON2SSESTORAGE int16x8_t vrsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VRSRA.S16 q0,q0,#16 +_NEON2SSE_INLINE int16x8_t vrsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c) // VRSRA.S16 q0,q0,#16 +{ + int16x8_t shift; + shift = vrshrq_n_s16(b, c); + return vaddq_s16(a, shift); +} + +_NEON2SSESTORAGE int32x4_t vrsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VRSRA.S32 q0,q0,#32 +_NEON2SSE_INLINE int32x4_t vrsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c) // VRSRA.S32 q0,q0,#32 +{ + int32x4_t shift; + shift = vrshrq_n_s32(b, c); + return vaddq_s32(a, shift); +} + +_NEON2SSESTORAGE int64x2_t vrsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VRSRA.S64 q0,q0,#64 +_NEON2SSE_INLINE int64x2_t vrsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c) +{ + int64x2_t shift; + shift = vrshrq_n_s64(b, c); + return vaddq_s64(a, shift); +} + +_NEON2SSESTORAGE uint8x16_t vrsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VRSRA.U8 q0,q0,#8 +_NEON2SSE_INLINE uint8x16_t vrsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c) // VRSRA.U8 q0,q0,#8 +{ + uint8x16_t shift; + shift = vrshrq_n_u8(b, c); + return vaddq_u8(a, shift); +} + +_NEON2SSESTORAGE uint16x8_t vrsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VRSRA.s16 q0,q0,#16 +_NEON2SSE_INLINE uint16x8_t vrsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c) // VRSRA.s16 q0,q0,#16 +{ + uint16x8_t shift; + shift = vrshrq_n_u16(b, c); + return vaddq_u16(a, shift); +} + +_NEON2SSESTORAGE uint32x4_t vrsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VRSRA.U32 q0,q0,#32 +_NEON2SSE_INLINE uint32x4_t vrsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c) // VRSRA.U32 q0,q0,#32 +{ + uint32x4_t shift; + shift = vrshrq_n_u32(b, c); + return vaddq_u32(a, shift); +} + +_NEON2SSESTORAGE uint64x2_t vrsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VRSRA.U64 q0,q0,#64 +_NEON2SSE_INLINE uint64x2_t vrsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c) +{ + uint64x2_t shift; + shift = vrshrq_n_u64(b, c); + return vaddq_u64(a, shift); +} + +//**********************Vector saturating shift left by constant ***************************** +//******************************************************************************************** +//we don't check const ranges assuming they are met +_NEON2SSESTORAGE int8x8_t vqshl_n_s8(int8x8_t a, __constrange(0,7) int b); // VQSHL.S8 d0,d0,#0 +_NEON2SSE_INLINE int8x8_t vqshl_n_s8(int8x8_t a, __constrange(0,7) int b) // VQSHL.S8 d0,d0,#0 +{ + //no 8 bit shift available in IA32 SIMD, go to 16 bit. It also provides the auto saturation (in packs function) + int8x8_t res64; + __m128i a128, r128; + a128 = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE 4.1 + r128 = _mm_slli_epi16 (a128, b); + r128 = _mm_packs_epi16 (r128,r128); //saturated s8, use 64 low bits only + return64(r128); +} + +_NEON2SSESTORAGE int16x4_t vqshl_n_s16(int16x4_t a, __constrange(0,15) int b); // VQSHL.S16 d0,d0,#0 +_NEON2SSE_INLINE int16x4_t vqshl_n_s16(int16x4_t a, __constrange(0,15) int b) // VQSHL.S16 d0,d0,#0 +{ + // go to 32 bit to get the auto saturation (in packs function) + int16x4_t res64; + __m128i a128, r128; + a128 = _MM_CVTEPI16_EPI32 (_pM128i(a)); //SSE 4.1 + r128 = _mm_slli_epi32 (a128, b); //shift_res + r128 = _mm_packs_epi32 (r128,r128); //saturated s16, use 64 low bits only + return64(r128); +} + +_NEON2SSESTORAGE int32x2_t vqshl_n_s32(int32x2_t a, __constrange(0,31) int b); // VQSHL.S32 d0,d0,#0 +_NEON2SSE_INLINE int32x2_t vqshl_n_s32(int32x2_t a, __constrange(0,31) int b) +{ + //serial execution may be faster + int32x2_t res64; + return64(vqshlq_n_s32 (_pM128i(a), b)); +} + + +_NEON2SSESTORAGE int64x1_t vqshl_n_s64(int64x1_t a, __constrange(0,63) int b); // VQSHL.S64 d0,d0,#0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x1_t vqshl_n_s64(int64x1_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + // no effective SIMD solution here + int64x1_t res; + int64_t bmask; + int64_t a_i64 = *( int64_t*)&a; + bmask = ( int64_t)1 << (63 - b); //positive + if (a_i64 >= bmask) { + res.m64_i64[0] = ~(_SIGNBIT64); + } else { + res.m64_i64[0] = (a_i64 <= -bmask) ? (int64_t)_SIGNBIT64 : a_i64 << b; + } + return res; +} + + +_NEON2SSESTORAGE uint8x8_t vqshl_n_u8(uint8x8_t a, __constrange(0,7) int b); // VQSHL.U8 d0,d0,#0 +_NEON2SSE_INLINE uint8x8_t vqshl_n_u8(uint8x8_t a, __constrange(0,7) int b) // VQSHL.U8 d0,d0,#0 +{ + //no 8 bit shift available in IA32 SIMD, go to 16 bit + uint8x8_t res64; + __m128i a128, r128; + a128 = _MM_CVTEPU8_EPI16 (_pM128i(a)); //SSE 4.1 + r128 = _mm_slli_epi16 (a128, b); //shift_res + r128 = _mm_packus_epi16 (r128,r128); //saturated u8, use 64 low bits only + return64(r128); +} + +_NEON2SSESTORAGE uint16x4_t vqshl_n_u16(uint16x4_t a, __constrange(0,15) int b); // VQSHL.s16 d0,d0,#0 +_NEON2SSE_INLINE uint16x4_t vqshl_n_u16(uint16x4_t a, __constrange(0,15) int b) // VQSHL.s16 d0,d0,#0 +{ + // go to 32 bit to get the auto saturation (in packus function) + uint16x4_t res64; + __m128i a128, r128; + a128 = _MM_CVTEPU16_EPI32 (_pM128i(a)); //SSE 4.1 + r128 = _mm_slli_epi32 (a128, b); //shift_res + r128 = _MM_PACKUS1_EPI32 (r128); //saturated s16 + return64(r128); +} + +_NEON2SSESTORAGE uint32x2_t vqshl_n_u32(uint32x2_t a, __constrange(0,31) int b); // VQSHL.U32 d0,d0,#0 +_NEON2SSE_INLINE uint32x2_t vqshl_n_u32(uint32x2_t a, __constrange(0,31) int b) +{ + uint32x2_t res64; + return64(vqshlq_n_u32(_pM128i(a), b)); +} + +_NEON2SSESTORAGE uint64x1_t vqshl_n_u64(uint64x1_t a, __constrange(0,63) int b); // VQSHL.U64 d0,d0,#0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vqshl_n_u64(uint64x1_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + // no effective SIMD solution here + uint64x1_t res; + uint64_t bmask; + uint64_t a_i64 = *(uint64_t*)&a; + bmask = ( uint64_t)1 << (64 - b); + res.m64_u64[0] = (a_i64 >= bmask)&&(b>0) ? 0xffffffffffffffff : a_i64 << b; //if b=0 we are fine with any a + return res; +} + +_NEON2SSESTORAGE int8x16_t vqshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHL.S8 q0,q0,#0 +_NEON2SSE_INLINE int8x16_t vqshlq_n_s8(int8x16_t a, __constrange(0,7) int b) // VQSHL.S8 q0,q0,#0 +{ + // go to 16 bit to get the auto saturation (in packs function) + __m128i a128, r128_1, r128_2; + a128 = _MM_CVTEPI8_EPI16 (a); //SSE 4.1 + r128_1 = _mm_slli_epi16 (a128, b); + //swap hi and low part of a128 to process the remaining data + a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32); + a128 = _MM_CVTEPI8_EPI16 (a128); + r128_2 = _mm_slli_epi16 (a128, b); + return _mm_packs_epi16 (r128_1, r128_2); //saturated s8 +} + +_NEON2SSESTORAGE int16x8_t vqshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHL.S16 q0,q0,#0 +_NEON2SSE_INLINE int16x8_t vqshlq_n_s16(int16x8_t a, __constrange(0,15) int b) // VQSHL.S16 q0,q0,#0 +{ + // manual saturation solution looks LESS optimal than 32 bits conversion one + // go to 32 bit to get the auto saturation (in packs function) + __m128i a128, r128_1, r128_2; + a128 = _MM_CVTEPI16_EPI32 (a); //SSE 4.1 + r128_1 = _mm_slli_epi32 (a128, b); //shift_res + //swap hi and low part of a128 to process the remaining data + a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32); + a128 = _MM_CVTEPI16_EPI32 (a128); + r128_2 = _mm_slli_epi32 (a128, b); + return _mm_packs_epi32 (r128_1, r128_2); //saturated s16 +} + +_NEON2SSESTORAGE int32x4_t vqshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHL.S32 q0,q0,#0 +_NEON2SSE_INLINE int32x4_t vqshlq_n_s32(int32x4_t a, __constrange(0,31) int b) // VQSHL.S32 q0,q0,#0 +{ + // no 64 bit saturation option available, special tricks necessary + __m128i c1, maskA, saturation_mask, c7ffffff_mask, shift_res, shift_res_mask; + c1 = _mm_cmpeq_epi32(a,a); //0xff..ff + maskA = _mm_srli_epi32(c1, b + 1); //mask for positive numbers (32-b+1) zeros and b-1 ones + saturation_mask = _mm_cmpgt_epi32 (a, maskA); //0xff...ff if we need saturation, 0 otherwise + c7ffffff_mask = _mm_srli_epi32(saturation_mask, 1); //saturated to 0x7f..ff when needed and zeros if not + shift_res = _mm_slli_epi32 (a, b); + shift_res_mask = _mm_andnot_si128(saturation_mask, shift_res); + //result with positive numbers saturated + shift_res = _mm_or_si128 (c7ffffff_mask, shift_res_mask); + //treat negative numbers + maskA = _mm_slli_epi32(c1, 31 - b); //mask for negative numbers b-1 ones and (32-b+1) zeros + saturation_mask = _mm_cmpgt_epi32 (maskA,a); //0xff...ff if we need saturation, 0 otherwise + c7ffffff_mask = _mm_slli_epi32(saturation_mask, 31); //saturated to 0x80..00 when needed and zeros if not + shift_res_mask = _mm_andnot_si128(saturation_mask, shift_res); + return _mm_or_si128 (c7ffffff_mask, shift_res_mask); +} + +_NEON2SSESTORAGE int64x2_t vqshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHL.S64 q0,q0,#0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqshlq_n_s64(int64x2_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + // no effective SIMD solution here + _NEON2SSE_ALIGN_16 int64_t atmp[2], res[2]; + int64_t bmask; + int i; + bmask = ( int64_t)1 << (63 - b); //positive + _mm_store_si128((__m128i*)atmp, a); + for (i = 0; i<2; i++) { + if (atmp[i] >= bmask) { + res[i] = ~(_SIGNBIT64); + } else { + res[i] = (atmp[i] <= -bmask) ? (int64_t)_SIGNBIT64 : atmp[i] << b; + } + } + return _mm_load_si128((__m128i*)res); +} + +_NEON2SSESTORAGE uint8x16_t vqshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VQSHL.U8 q0,q0,#0 +_NEON2SSE_INLINE uint8x16_t vqshlq_n_u8(uint8x16_t a, __constrange(0,7) int b) // VQSHL.U8 q0,q0,#0 +{ + // go to 16 bit to get the auto saturation (in packs function) + __m128i a128, r128_1, r128_2; + a128 = _MM_CVTEPU8_EPI16 (a); //SSE 4.1 + r128_1 = _mm_slli_epi16 (a128, b); + //swap hi and low part of a128 to process the remaining data + a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32); + a128 = _MM_CVTEPU8_EPI16 (a128); + r128_2 = _mm_slli_epi16 (a128, b); + return _mm_packus_epi16 (r128_1, r128_2); //saturated u8 +} + +_NEON2SSESTORAGE uint16x8_t vqshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VQSHL.s16 q0,q0,#0 +_NEON2SSE_INLINE uint16x8_t vqshlq_n_u16(uint16x8_t a, __constrange(0,15) int b) // VQSHL.s16 q0,q0,#0 +{ + // manual saturation solution looks more optimal than 32 bits conversion one + __m128i cb, c8000, a_signed, saturation_mask, shift_res; + cb = _mm_set1_epi16((1 << (16 - b)) - 1 - 0x8000 ); + c8000 = _mm_set1_epi16 (-32768); // (int16_t)0x8000 +//no unsigned shorts comparison in SSE, only signed available, so need the trick + a_signed = _mm_sub_epi16(a, c8000); //go to signed + saturation_mask = _mm_cmpgt_epi16 (a_signed, cb); + shift_res = _mm_slli_epi16 (a, b); + return _mm_or_si128 (shift_res, saturation_mask); +} + +_NEON2SSESTORAGE uint32x4_t vqshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VQSHL.U32 q0,q0,#0 +_NEON2SSE_INLINE uint32x4_t vqshlq_n_u32(uint32x4_t a, __constrange(0,31) int b) // VQSHL.U32 q0,q0,#0 +{ + // manual saturation solution, no 64 bit saturation option, the serial version may be faster + __m128i cb, c80000000, a_signed, saturation_mask, shift_res; + cb = _mm_set1_epi32((1 << (32 - b)) - 1 - 0x80000000 ); + c80000000 = _mm_set1_epi32 (0x80000000); +//no unsigned ints comparison in SSE, only signed available, so need the trick + a_signed = _mm_sub_epi32(a, c80000000); //go to signed + saturation_mask = _mm_cmpgt_epi32 (a_signed, cb); + shift_res = _mm_slli_epi32 (a, b); + return _mm_or_si128 (shift_res, saturation_mask); +} + +_NEON2SSESTORAGE uint64x2_t vqshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VQSHL.U64 q0,q0,#0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqshlq_n_u64(uint64x2_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + // no effective SIMD solution here + _NEON2SSE_ALIGN_16 uint64_t atmp[2], res[2]; + uint64_t bmask; + int i; + bmask = ( uint64_t)1 << (64 - b); + _mm_store_si128((__m128i*)atmp, a); + for (i = 0; i<2; i++) { + res[i] = (atmp[i] >= bmask)&&(b>0) ? 0xffffffffffffffff : atmp[i] << b; //if b=0 we are fine with any a + } + return _mm_load_si128((__m128i*)res); +} + +//**************Vector signed->unsigned saturating shift left by constant ************* +//************************************************************************************* +_NEON2SSESTORAGE uint8x8_t vqshlu_n_s8(int8x8_t a, __constrange(0,7) int b); // VQSHLU.S8 d0,d0,#0 +_NEON2SSE_INLINE uint8x8_t vqshlu_n_s8(int8x8_t a, __constrange(0,7) int b) // VQSHLU.S8 d0,d0,#0 +{ + //no 8 bit shift available in IA32 SIMD, go to 16 bit. It also provides the auto saturation (in packs function) + uint8x8_t res64; + __m128i a128, r128; + a128 = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE 4.1 + r128 = _mm_slli_epi16 (a128, b); + r128 = _mm_packus_epi16 (r128,r128); //saturated u8, use 64 low bits only + return64(r128); +} + +_NEON2SSESTORAGE uint16x4_t vqshlu_n_s16(int16x4_t a, __constrange(0,15) int b); // VQSHLU.S16 d0,d0,#0 +_NEON2SSE_INLINE uint16x4_t vqshlu_n_s16(int16x4_t a, __constrange(0,15) int b) // VQSHLU.S16 d0,d0,#0 +{ + uint16x4_t res64; + __m128i a128, r128; + a128 = _MM_CVTEPI16_EPI32 (_pM128i(a)); //SSE 4.1 + r128 = _mm_slli_epi32 (a128, b); //shift_res + r128 = _MM_PACKUS1_EPI32 (r128); //saturated s16, use 64 low bits only + return64(r128); +} + +_NEON2SSESTORAGE uint32x2_t vqshlu_n_s32(int32x2_t a, __constrange(0,31) int b); // VQSHLU.S32 d0,d0,#0 +_NEON2SSE_INLINE int32x2_t vqshlu_n_s32(int32x2_t a, __constrange(0,31) int b) +{ + int32x2_t res64; + return64( vqshluq_n_s32(_pM128i(a), b)); +} + +_NEON2SSESTORAGE uint64x1_t vqshlu_n_s64(int64x1_t a, __constrange(0,63) int b); // VQSHLU.S64 d0,d0,#0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x1_t vqshlu_n_s64(int64x1_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL) // no effective SIMD solution here, serial execution looks faster +{ + uint64x1_t res; + uint64_t limit; + if (a.m64_i64[0]<=0) { + res.m64_u64[0] = 0; + } else { + limit = (uint64_t) 1 << (64 - b); + res.m64_u64[0] = ( ((uint64_t)a.m64_i64[0]) >= limit) ? ~((uint64_t)0) : (uint64_t)a.m64_i64[0] << b; + } + return res; +} + +_NEON2SSESTORAGE uint8x16_t vqshluq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHLU.S8 q0,q0,#0 +_NEON2SSE_INLINE uint8x16_t vqshluq_n_s8(int8x16_t a, __constrange(0,7) int b) // VQSHLU.S8 q0,q0,#0 +{ + __m128i a128, r128_1, r128_2; + a128 = _MM_CVTEPI8_EPI16 (a); //SSE 4.1 + r128_1 = _mm_slli_epi16 (a128, b); + //swap hi and low part of a128 to process the remaining data + a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32); + a128 = _MM_CVTEPI8_EPI16 (a128); + r128_2 = _mm_slli_epi16 (a128, b); + return _mm_packus_epi16 (r128_1, r128_2); //saturated u8 +} + +_NEON2SSESTORAGE uint16x8_t vqshluq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHLU.S16 q0,q0,#0 +_NEON2SSE_INLINE uint16x8_t vqshluq_n_s16(int16x8_t a, __constrange(0,15) int b) // VQSHLU.S16 q0,q0,#0 +{ + // manual saturation solution looks LESS optimal than 32 bits conversion one + __m128i a128, r128_1, r128_2; + a128 = _MM_CVTEPI16_EPI32 (a); //SSE 4.1 + r128_1 = _mm_slli_epi32 (a128, b); //shift_res + //swap hi and low part of a128 to process the remaining data + a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32); + a128 = _MM_CVTEPI16_EPI32 (a128); + r128_2 = _mm_slli_epi32 (a128, b); + return _MM_PACKUS_EPI32 (r128_1, r128_2); //saturated s16 +} + +_NEON2SSESTORAGE uint32x4_t vqshluq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHLU.S32 q0,q0,#0 +_NEON2SSE_INLINE uint32x4_t vqshluq_n_s32(int32x4_t a, __constrange(0,31) int b) // VQSHLU.S32 q0,q0,#0 +{ + //solution may be not optimal compared with the serial one + __m128i zero, maskA, maskGT0, a0, a_masked, a_shift; + zero = _mm_setzero_si128(); + maskA = _mm_cmpeq_epi32(a, a); + maskA = _mm_slli_epi32(maskA,(32 - b)); // b ones and (32-b)zeros + //saturate negative numbers to zero + maskGT0 = _mm_cmpgt_epi32 (a, zero); // //0xffffffff if positive number and zero otherwise (negative numbers) + a0 = _mm_and_si128 (a, maskGT0); //negative are zeros now + //saturate positive to 0xffffffff + a_masked = _mm_and_si128 (a0, maskA); + a_masked = _mm_cmpgt_epi32 (a_masked, zero); //0xffffffff if saturation necessary 0 otherwise + a_shift = _mm_slli_epi32 (a0, b); + return _mm_or_si128 (a_shift, a_masked); //actual saturation +} + +_NEON2SSESTORAGE uint64x2_t vqshluq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHLU.S64 q0,q0,#0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqshluq_n_s64(int64x2_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL) +{ + // no effective SIMD solution here, serial execution looks faster + _NEON2SSE_ALIGN_16 int64_t atmp[2]; + _NEON2SSE_ALIGN_16 uint64_t res[2]; + uint64_t limit; + int i; + _mm_store_si128((__m128i*)atmp, a); + for (i = 0; i<2; i++) { + if (atmp[i]<=0) { + res[i] = 0; + } else { + limit = (uint64_t) 1 << (64 - b); + res[i] = ( ((uint64_t)atmp[i]) >= limit) ? ~((uint64_t)0) : (uint64_t)atmp[i] << b; + } + } + return _mm_load_si128((__m128i*)res); +} + +//************** Vector narrowing shift right by constant ************** +//********************************************************************** +_NEON2SSESTORAGE int8x8_t vshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VSHRN.I16 d0,q0,#8 +_NEON2SSE_INLINE int8x8_t vshrn_n_s16(int16x8_t a, __constrange(1,8) int b) // VSHRN.I16 d0,q0,#8 +{ + int8x8_t res64; + __m128i r16; + r16 = vshrq_n_s16(a,b); + r16 = _mm_shuffle_epi8 (r16, *(__m128i*) mask8_16_even_odd); //narrow, use low 64 bits only. Impossible to use _mm_packs because of negative saturation problems + return64(r16); +} + +_NEON2SSESTORAGE int16x4_t vshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VSHRN.I32 d0,q0,#16 +_NEON2SSE_INLINE int16x4_t vshrn_n_s32(int32x4_t a, __constrange(1,16) int b) // VSHRN.I32 d0,q0,#16 +{ + int16x4_t res64; + __m128i r32; + r32 = vshrq_n_s32(a,b); + r32 = _mm_shuffle_epi8 (r32, *(__m128i*) mask8_32_even_odd); //narrow, use low 64 bits only. Impossible to use _mm_packs because of negative saturation problems + return64(r32); +} + +_NEON2SSESTORAGE int32x2_t vshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VSHRN.I64 d0,q0,#32 +_NEON2SSE_INLINE int32x2_t vshrn_n_s64(int64x2_t a, __constrange(1,32) int b) +{ + int32x2_t res64; + __m128i r64; + r64 = vshrq_n_s64(a,b); + r64 = _mm_shuffle_epi32(r64, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(r64); +} + +_NEON2SSESTORAGE uint8x8_t vshrn_n_u16(uint16x8_t a, __constrange(1,8) int b); // VSHRN.I16 d0,q0,#8 +_NEON2SSE_INLINE uint8x8_t vshrn_n_u16(uint16x8_t a, __constrange(1,8) int b) // VSHRN.I16 d0,q0,#8 +{ + uint8x8_t res64; + __m128i mask, r16; + mask = _mm_set1_epi16(0xff); + r16 = vshrq_n_u16(a,b); //after right shift b>=1 unsigned var fits into signed range, so we could use _mm_packus_epi16 (signed 16 to unsigned 8) + r16 = _mm_and_si128(r16, mask); //to avoid saturation + r16 = _mm_packus_epi16 (r16,r16); //narrow, use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE uint16x4_t vshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VSHRN.I32 d0,q0,#16 +_NEON2SSE_INLINE uint16x4_t vshrn_n_u32(uint32x4_t a, __constrange(1,16) int b) // VSHRN.I32 d0,q0,#16 +{ + uint16x4_t res64; + __m128i mask, r32; + mask = _mm_set1_epi32(0xffff); + r32 = vshrq_n_u32(a,b); //after right shift b>=1 unsigned var fits into signed range, so we could use _MM_PACKUS_EPI32 (signed 32 to unsigned 16) + r32 = _mm_and_si128(r32, mask); //to avoid saturation + r32 = _MM_PACKUS1_EPI32 (r32); //saturate and narrow, use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE uint32x2_t vshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VSHRN.I64 d0,q0,#32 +_NEON2SSE_INLINE uint32x2_t vshrn_n_u64(uint64x2_t a, __constrange(1,32) int b) +{ + uint32x2_t res64; + __m128i r64; + r64 = vshrq_n_u64(a,b); + r64 = _mm_shuffle_epi32(r64, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(r64); +} + +//************** Vector signed->unsigned narrowing saturating shift right by constant ******** +//********************************************************************************************* +_NEON2SSESTORAGE uint8x8_t vqshrun_n_s16(int16x8_t a, __constrange(1,8) int b); // VQSHRUN.S16 d0,q0,#8 +_NEON2SSE_INLINE uint8x8_t vqshrun_n_s16(int16x8_t a, __constrange(1,8) int b) // VQSHRUN.S16 d0,q0,#8 +{ + uint8x8_t res64; + __m128i r16; + r16 = vshrq_n_s16(a,b); + r16 = _mm_packus_epi16 (r16,r16); //saturate and narrow (signed to unsigned), use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE uint16x4_t vqshrun_n_s32(int32x4_t a, __constrange(1,16) int b); // VQSHRUN.S32 d0,q0,#16 +_NEON2SSE_INLINE uint16x4_t vqshrun_n_s32(int32x4_t a, __constrange(1,16) int b) // VQSHRUN.S32 d0,q0,#16 +{ + uint16x4_t res64; + __m128i r32; + r32 = vshrq_n_s32(a,b); + r32 = _MM_PACKUS1_EPI32 (r32); //saturate and narrow(signed to unsigned), use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE uint32x2_t vqshrun_n_s64(int64x2_t a, __constrange(1,32) int b); // VQSHRUN.S64 d0,q0,#32 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vqshrun_n_s64(int64x2_t a, __constrange(1,32) int b), _NEON2SSE_REASON_SLOW_SERIAL) //serial solution is faster +{ + _NEON2SSE_ALIGN_16 int64_t atmp[2]; + uint32x2_t res; + int64_t res64; + _mm_store_si128((__m128i*)atmp, a); + if (atmp[0] < 0) { + res.m64_u32[0] = 0; + } else { + res64 = (atmp[0] >> b); + res.m64_u32[0] = (res64 > (int64_t)0xffffffff) ? 0xffffffff : (uint32_t) res64; + } + if (atmp[1] < 0) { + res.m64_u32[1] = 0; + } else { + res64 = (atmp[1] >> b); + res.m64_u32[1] = (res64 > (int64_t)0xffffffff) ? 0xffffffff : (uint32_t)res64; + } + return res; +} + +//**** Vector signed->unsigned rounding narrowing saturating shift right by constant ***** +_NEON2SSESTORAGE uint8x8_t vqrshrun_n_s16(int16x8_t a, __constrange(1,8) int b); // VQRSHRUN.S16 d0,q0,#8 +_NEON2SSE_INLINE uint8x8_t vqrshrun_n_s16(int16x8_t a, __constrange(1,8) int b) // VQRSHRUN.S16 d0,q0,#8 +{ + //solution may be not optimal compared with the serial one + __m128i r16; + uint8x8_t res64; + r16 = vrshrq_n_s16(a,b); + r16 = _mm_packus_epi16 (r16,r16); //saturate and narrow (signed to unsigned), use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE uint16x4_t vqrshrun_n_s32(int32x4_t a, __constrange(1,16) int b); // VQRSHRUN.S32 d0,q0,#16 +_NEON2SSE_INLINE uint16x4_t vqrshrun_n_s32(int32x4_t a, __constrange(1,16) int b) // VQRSHRUN.S32 d0,q0,#16 +{ + //solution may be not optimal compared with the serial one + __m128i r32; + uint16x4_t res64; + r32 = vrshrq_n_s32(a,b); + r32 = _MM_PACKUS1_EPI32 (r32); //saturate and narrow (signed to unsigned), use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE uint32x2_t vqrshrun_n_s64(int64x2_t a, __constrange(1,32) int b); // VQRSHRUN.S64 d0,q0,#32 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vqrshrun_n_s64(int64x2_t a, __constrange(1,32) int b), _NEON2SSE_REASON_SLOW_SERIAL) //serial solution is faster +{ + _NEON2SSE_ALIGN_16 int64_t atmp[2]; + uint32x2_t res; + int64_t res64; + _mm_store_si128((__m128i*)atmp, a); + if (atmp[0] < 0) { + res.m64_u32[0] = 0; + } else { + res64 = (atmp[0] >> b) + ( (atmp[0] & ((int64_t)1 << (b - 1))) >> (b - 1) ); + res.m64_u32[0] = (uint32_t) ((res64 > (int64_t)0xffffffff ) ? 0xffffffff : res64); + } + if (atmp[1] < 0) { + res.m64_u32[1] = 0; + } else { + res64 = (atmp[1] >> b) + ( (atmp[1] & ((int64_t)1 << (b - 1))) >> (b - 1) ); + res.m64_u32[1] = (uint32_t)((res64 > (int64_t)0xffffffff ) ? 0xffffffff : res64); + } + return res; +} + +//***** Vector narrowing saturating shift right by constant ****** +//***************************************************************** +_NEON2SSESTORAGE int8x8_t vqshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VQSHRN.S16 d0,q0,#8 +_NEON2SSE_INLINE int8x8_t vqshrn_n_s16(int16x8_t a, __constrange(1,8) int b) // VQSHRN.S16 d0,q0,#8 +{ + int8x8_t res64; + __m128i r16; + r16 = vshrq_n_s16(a,b); + r16 = _mm_packs_epi16 (r16,r16); //saturate and narrow, use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE int16x4_t vqshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VQSHRN.S32 d0,q0,#16 +_NEON2SSE_INLINE int16x4_t vqshrn_n_s32(int32x4_t a, __constrange(1,16) int b) // VQSHRN.S32 d0,q0,#16 +{ + int16x4_t res64; + __m128i r32; + r32 = vshrq_n_s32(a,b); + r32 = _mm_packs_epi32 (r32,r32); //saturate and narrow, use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE int32x2_t vqshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VQSHRN.S64 d0,q0,#32 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqshrn_n_s64(int64x2_t a, __constrange(1,32) int b), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + //no optimal SIMD solution found + _NEON2SSE_ALIGN_16 int64_t res64[2], atmp[2]; + int32x2_t res; + _mm_store_si128((__m128i*)atmp, a); + res64[0] = (atmp[0] >> b); + res64[1] = (atmp[1] >> b); + if(res64[0]>SINT_MAX) res64[0] = SINT_MAX; + if(res64[0]SINT_MAX) res64[1] = SINT_MAX; + if(res64[1]=1 unsigned var fits into signed range, so we could use _mm_packus_epi16 (signed 16 to unsigned 8) + r16 = _mm_packus_epi16 (r16,r16); //saturate and narrow, use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE uint16x4_t vqshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VQSHRN.U32 d0,q0,#16 +_NEON2SSE_INLINE uint16x4_t vqshrn_n_u32(uint32x4_t a, __constrange(1,16) int b) // VQSHRN.U32 d0,q0,#16 +{ + uint16x4_t res64; + __m128i r32; + r32 = vshrq_n_u32(a,b); //after right shift b>=1 unsigned var fits into signed range, so we could use _MM_PACKUS_EPI32 (signed 32 to unsigned 8) + r32 = _MM_PACKUS1_EPI32 (r32); //saturate and narrow, use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE uint32x2_t vqshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VQSHRN.U64 d0,q0,#32 +_NEON2SSE_INLINE uint32x2_t vqshrn_n_u64(uint64x2_t a, __constrange(1,32) int b) +{ + //serial solution may be faster + uint32x2_t res64; + __m128i r64, res_hi, zero; + zero = _mm_setzero_si128(); + r64 = vshrq_n_u64(a,b); + res_hi = _mm_srli_epi64(r64, 32); + res_hi = _mm_cmpgt_epi32(res_hi, zero); + r64 = _mm_or_si128(r64, res_hi); + r64 = _mm_shuffle_epi32(r64, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(r64); +} + + +//********* Vector rounding narrowing shift right by constant ************************* +//**************************************************************************************** +_NEON2SSESTORAGE int8x8_t vrshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VRSHRN.I16 d0,q0,#8 +_NEON2SSE_INLINE int8x8_t vrshrn_n_s16(int16x8_t a, __constrange(1,8) int b) // VRSHRN.I16 d0,q0,#8 +{ + int8x8_t res64; + __m128i r16; + r16 = vrshrq_n_s16(a,b); + r16 = _mm_shuffle_epi8 (r16, *(__m128i*) mask8_16_even_odd); //narrow, use low 64 bits only. Impossible to use _mm_packs because of negative saturation problems + return64(r16); +} + +_NEON2SSESTORAGE int16x4_t vrshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VRSHRN.I32 d0,q0,#16 +_NEON2SSE_INLINE int16x4_t vrshrn_n_s32(int32x4_t a, __constrange(1,16) int b) // VRSHRN.I32 d0,q0,#16 +{ + int16x4_t res64; + __m128i r32; + r32 = vrshrq_n_s32(a,b); + r32 = _mm_shuffle_epi8 (r32, *(__m128i*) mask8_32_even_odd); //narrow, use low 64 bits only. Impossible to use _mm_packs because of negative saturation problems + return64(r32); +} + +_NEON2SSESTORAGE int32x2_t vrshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VRSHRN.I64 d0,q0,#32 +_NEON2SSE_INLINE int32x2_t vrshrn_n_s64(int64x2_t a, __constrange(1,32) int b) +{ + int32x2_t res64; + __m128i r64; + r64 = vrshrq_n_s64(a,b); + r64 = _mm_shuffle_epi32(r64, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(r64); +} + +_NEON2SSESTORAGE uint8x8_t vrshrn_n_u16(uint16x8_t a, __constrange(1,8) int b); // VRSHRN.I16 d0,q0,#8 +_NEON2SSE_INLINE uint8x8_t vrshrn_n_u16(uint16x8_t a, __constrange(1,8) int b) // VRSHRN.I16 d0,q0,#8 +{ + uint8x8_t res64; + __m128i mask, r16; + mask = _mm_set1_epi16(0xff); + r16 = vrshrq_n_s16(a,b); //after right shift b>=1 unsigned var fits into signed range, so we could use _mm_packus_epi16 (signed 16 to unsigned 8) + r16 = _mm_and_si128(r16, mask); //to avoid saturation + r16 = _mm_packus_epi16 (r16,r16); //saturate and narrow, use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE uint16x4_t vrshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VRSHRN.I32 d0,q0,#16 +_NEON2SSE_INLINE uint16x4_t vrshrn_n_u32(uint32x4_t a, __constrange(1,16) int b) // VRSHRN.I32 d0,q0,#16 +{ + uint16x4_t res64; + __m128i mask, r32; + mask = _mm_set1_epi32(0xffff); + r32 = vrshrq_n_u32(a,b); //after right shift b>=1 unsigned var fits into signed range, so we could use _MM_PACKUS_EPI32 (signed 32 to unsigned 8) + r32 = _mm_and_si128(r32, mask); //to avoid saturation + r32 = _MM_PACKUS1_EPI32 (r32); //saturate and narrow, use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE uint32x2_t vrshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VRSHRN.I64 d0,q0,#32 +_NEON2SSE_INLINE uint32x2_t vrshrn_n_u64(uint64x2_t a, __constrange(1,32) int b) //serial solution may be faster +{ + uint32x2_t res64; + __m128i r64; + r64 = vrshrq_n_u64(a,b); + r64 = _mm_shuffle_epi32(r64, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(r64); +} + +//************* Vector rounding narrowing saturating shift right by constant ************ +//**************************************************************************************** +_NEON2SSESTORAGE int8x8_t vqrshrn_n_s16(int16x8_t a, __constrange(1,8) int b); // VQRSHRN.S16 d0,q0,#8 +_NEON2SSE_INLINE int8x8_t vqrshrn_n_s16(int16x8_t a, __constrange(1,8) int b) // VQRSHRN.S16 d0,q0,#8 +{ + int8x8_t res64; + __m128i r16; + r16 = vrshrq_n_s16(a,b); + r16 = _mm_packs_epi16 (r16,r16); //saturate and narrow, use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE int16x4_t vqrshrn_n_s32(int32x4_t a, __constrange(1,16) int b); // VQRSHRN.S32 d0,q0,#16 +_NEON2SSE_INLINE int16x4_t vqrshrn_n_s32(int32x4_t a, __constrange(1,16) int b) // VQRSHRN.S32 d0,q0,#16 +{ + int16x4_t res64; + __m128i r32; + r32 = vrshrq_n_s32(a,b); + r32 = _mm_packs_epi32 (r32,r32); //saturate and narrow, use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE int32x2_t vqrshrn_n_s64(int64x2_t a, __constrange(1,32) int b); // VQRSHRN.S64 d0,q0,#32 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqrshrn_n_s64(int64x2_t a, __constrange(1,32) int b), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + //no optimal SIMD solution found + _NEON2SSE_ALIGN_16 int64_t res64[2], atmp[2], maskb[2]; + int32x2_t res; + _mm_store_si128((__m128i*)atmp, a); + maskb[0] = atmp[0] & (( int64_t)1 << (b - 1)); + res64[0] = (atmp[0] >> b) + (maskb[0] >> (b - 1)); //rounded result + maskb[1] = atmp[1] & (( int64_t)1 << (b - 1)); + res64[1] = (atmp[1] >> b) + (maskb[1] >> (b - 1)); //rounded result + if(res64[0]>SINT_MAX) res64[0] = SINT_MAX; + if(res64[0]SINT_MAX) res64[1] = SINT_MAX; + if(res64[1]=1 unsigned var fits into signed range, so we could use _mm_packus_epi16 (signed 16 to unsigned 8) + r16 = _mm_packus_epi16 (r16,r16); //saturate and narrow, use low 64 bits only + return64(r16); +} + +_NEON2SSESTORAGE uint16x4_t vqrshrn_n_u32(uint32x4_t a, __constrange(1,16) int b); // VQRSHRN.U32 d0,q0,#16 +_NEON2SSE_INLINE uint16x4_t vqrshrn_n_u32(uint32x4_t a, __constrange(1,16) int b) // VQRSHRN.U32 d0,q0,#16 +{ + uint16x4_t res64; + __m128i r32; + r32 = vrshrq_n_u32(a,b); //after right shift b>=1 unsigned var fits into signed range, so we could use _MM_PACKUS_EPI32 (signed 32 to unsigned 8) + r32 = _MM_PACKUS1_EPI32 (r32); //saturate and narrow, use low 64 bits only + return64(r32); +} + +_NEON2SSESTORAGE uint32x2_t vqrshrn_n_u64(uint64x2_t a, __constrange(1,32) int b); // VQRSHRN.U64 d0,q0,#32 +_NEON2SSE_INLINE uint32x2_t vqrshrn_n_u64(uint64x2_t a, __constrange(1,32) int b) +{ + //serial solution may be faster + uint32x2_t res64; + __m128i r64, res_hi, zero; + zero = _mm_setzero_si128(); + r64 = vrshrq_n_u64(a,b); + res_hi = _mm_srli_epi64(r64, 32); + res_hi = _mm_cmpgt_epi32(res_hi, zero); + r64 = _mm_or_si128(r64, res_hi); + r64 = _mm_shuffle_epi32(r64, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(r64); +} + +//************** Vector widening shift left by constant **************** +//************************************************************************ +_NEON2SSESTORAGE int16x8_t vshll_n_s8(int8x8_t a, __constrange(0,8) int b); // VSHLL.S8 q0,d0,#0 +_NEON2SSE_INLINE int16x8_t vshll_n_s8(int8x8_t a, __constrange(0,8) int b) // VSHLL.S8 q0,d0,#0 +{ + __m128i r; + r = _MM_CVTEPI8_EPI16 (_pM128i(a)); //SSE 4.1 + return _mm_slli_epi16 (r, b); +} + +_NEON2SSESTORAGE int32x4_t vshll_n_s16(int16x4_t a, __constrange(0,16) int b); // VSHLL.S16 q0,d0,#0 +_NEON2SSE_INLINE int32x4_t vshll_n_s16(int16x4_t a, __constrange(0,16) int b) // VSHLL.S16 q0,d0,#0 +{ + __m128i r; + r = _MM_CVTEPI16_EPI32(_pM128i(a)); //SSE4.1, + return _mm_slli_epi32 (r, b); +} + +_NEON2SSESTORAGE int64x2_t vshll_n_s32(int32x2_t a, __constrange(0,32) int b); // VSHLL.S32 q0,d0,#0 +_NEON2SSE_INLINE int64x2_t vshll_n_s32(int32x2_t a, __constrange(0,32) int b) // VSHLL.S32 q0,d0,#0 +{ + __m128i r; + r = _MM_CVTEPI32_EPI64(_pM128i(a)); //SSE4.1, + return _mm_slli_epi64 (r, b); +} + +_NEON2SSESTORAGE uint16x8_t vshll_n_u8(uint8x8_t a, __constrange(0,8) int b); // VSHLL.U8 q0,d0,#0 +_NEON2SSE_INLINE uint16x8_t vshll_n_u8(uint8x8_t a, __constrange(0,8) int b) // VSHLL.U8 q0,d0,#0 +{ + //no uint8 to uint16 conversion available, manual conversion used + __m128i zero, r; + zero = _mm_setzero_si128 (); + r = _mm_unpacklo_epi8(_pM128i(a), zero); + return _mm_slli_epi16 (r, b); +} + +_NEON2SSESTORAGE uint32x4_t vshll_n_u16(uint16x4_t a, __constrange(0,16) int b); // VSHLL.s16 q0,d0,#0 +_NEON2SSE_INLINE uint32x4_t vshll_n_u16(uint16x4_t a, __constrange(0,16) int b) // VSHLL.s16 q0,d0,#0 +{ + //no uint16 to uint32 conversion available, manual conversion used + __m128i zero, r; + zero = _mm_setzero_si128 (); + r = _mm_unpacklo_epi16(_pM128i(a), zero); + return _mm_slli_epi32 (r, b); +} + +_NEON2SSESTORAGE uint64x2_t vshll_n_u32(uint32x2_t a, __constrange(0,32) int b); // VSHLL.U32 q0,d0,#0 +_NEON2SSE_INLINE uint64x2_t vshll_n_u32(uint32x2_t a, __constrange(0,32) int b) // VSHLL.U32 q0,d0,#0 +{ + //no uint32 to uint64 conversion available, manual conversion used + __m128i zero, r; + zero = _mm_setzero_si128 (); + r = _mm_unpacklo_epi32(_pM128i(a), zero); + return _mm_slli_epi64 (r, b); +} + +//************************************************************************************ +//**************************** Shifts with insert ************************************ +//************************************************************************************ +//takes each element in a vector, shifts them by an immediate value, +//and inserts the results in the destination vector. Bits shifted out of the each element are lost. + +//**************** Vector shift right and insert ************************************ +//Actually the "c" left bits from "a" are the only bits remained from "a" after the shift. +//All other bits are taken from b shifted. +_NEON2SSESTORAGE int8x8_t vsri_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c); // VSRI.8 d0,d0,#8 +_NEON2SSE_INLINE int8x8_t vsri_n_s8(int8x8_t a, int8x8_t b, __constrange(1,8) int c) +{ + int8x8_t res64; + return64(vsriq_n_s8(_pM128i(a),_pM128i(b), c)); +} + + +_NEON2SSESTORAGE int16x4_t vsri_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c); // VSRI.16 d0,d0,#16 +_NEON2SSE_INLINE int16x4_t vsri_n_s16(int16x4_t a, int16x4_t b, __constrange(1,16) int c) +{ + int16x4_t res64; + return64(vsriq_n_s16(_pM128i(a),_pM128i(b), c)); +} + + +_NEON2SSESTORAGE int32x2_t vsri_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c); // VSRI.32 d0,d0,#32 +_NEON2SSE_INLINE int32x2_t vsri_n_s32(int32x2_t a, int32x2_t b, __constrange(1,32) int c) +{ + int32x2_t res64; + return64(vsriq_n_s32(_pM128i(a),_pM128i(b), c)); +} + + +_NEON2SSESTORAGE int64x1_t vsri_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c); // VSRI.64 d0,d0,#64 +_NEON2SSE_INLINE int64x1_t vsri_n_s64(int64x1_t a, int64x1_t b, __constrange(1,64) int c) +{ + int64x1_t res; + if (c ==64) + res = a; + else{ + res.m64_i64[0] = (b.m64_u64[0] >> c) | ((a.m64_i64[0] >> (64 - c)) << (64 - c)); //treat b as unsigned for shift to get leading zeros + } + return res; +} + +_NEON2SSE_GLOBAL uint8x8_t vsri_n_u8(uint8x8_t a, uint8x8_t b, __constrange(1,8) int c); // VSRI.8 d0,d0,#8 +#define vsri_n_u8 vsri_n_s8 + +_NEON2SSE_GLOBAL uint16x4_t vsri_n_u16(uint16x4_t a, uint16x4_t b, __constrange(1,16) int c); // VSRI.16 d0,d0,#16 +#define vsri_n_u16 vsri_n_s16 + +_NEON2SSE_GLOBAL uint32x2_t vsri_n_u32(uint32x2_t a, uint32x2_t b, __constrange(1,32) int c); // VSRI.32 d0,d0,#32 +#define vsri_n_u32 vsri_n_s32 + + +_NEON2SSE_GLOBAL uint64x1_t vsri_n_u64(uint64x1_t a, uint64x1_t b, __constrange(1,64) int c); // VSRI.64 d0,d0,#64 +#define vsri_n_u64 vsri_n_s64 + +_NEON2SSE_GLOBAL poly8x8_t vsri_n_p8(poly8x8_t a, poly8x8_t b, __constrange(1,8) int c); // VSRI.8 d0,d0,#8 +#define vsri_n_p8 vsri_n_u8 + +_NEON2SSE_GLOBAL poly16x4_t vsri_n_p16(poly16x4_t a, poly16x4_t b, __constrange(1,16) int c); // VSRI.16 d0,d0,#16 +#define vsri_n_p16 vsri_n_u16 + +_NEON2SSESTORAGE int8x16_t vsriq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8 +_NEON2SSE_INLINE int8x16_t vsriq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c) // VSRI.8 q0,q0,#8 +{ + __m128i maskA, a_masked; + uint8x16_t b_shift; + _NEON2SSE_ALIGN_16 static const uint8_t maskLeft[9] = {0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; //"a" bits mask, 0 bit not used + maskA = _mm_set1_epi8(maskLeft[c]); // c ones and (8-c)zeros + a_masked = _mm_and_si128 (a, maskA); + b_shift = vshrq_n_u8( b, c); // c zeros on the left in b due to logical shift + return _mm_or_si128 (a_masked, b_shift); //combine (insert b into a) +} + +_NEON2SSESTORAGE int16x8_t vsriq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16 +_NEON2SSE_INLINE int16x8_t vsriq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c) // VSRI.16 q0,q0,#16 +{ + //to cut "c" left bits from a we do shift right and then shift back left providing c right zeros in a + uint16x8_t b_shift; + uint16x8_t a_c; + b_shift = vshrq_n_u16( b, c); // c zeros on the left in b due to logical shift + a_c = vshrq_n_u16( a, (16 - c)); + a_c = _mm_slli_epi16(a_c, (16 - c)); //logical shift provides right "c" bits zeros in a + return _mm_or_si128 (a_c, b_shift); //combine (insert b into a) +} + +_NEON2SSESTORAGE int32x4_t vsriq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32 +_NEON2SSE_INLINE int32x4_t vsriq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c) // VSRI.32 q0,q0,#32 +{ + //to cut "c" left bits from a we do shift right and then shift back left providing c right zeros in a + uint32x4_t b_shift; + uint32x4_t a_c; + b_shift = vshrq_n_u32( b, c); // c zeros on the left in b due to logical shift + a_c = vshrq_n_u32( a, (32 - c)); + a_c = _mm_slli_epi32(a_c, (32 - c)); //logical shift provides right "c" bits zeros in a + return _mm_or_si128 (a_c, b_shift); //combine (insert b into a) +} + +_NEON2SSESTORAGE int64x2_t vsriq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64 +_NEON2SSE_INLINE int64x2_t vsriq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c) +{ + //serial solution may be faster + uint64x2_t b_shift; + uint64x2_t a_c; + b_shift = _mm_srli_epi64(b, c); // c zeros on the left in b due to logical shift + a_c = _mm_srli_epi64(a, (64 - c)); + a_c = _mm_slli_epi64(a_c, (64 - c)); //logical shift provides right "c" bits zeros in a + return _mm_or_si128 (a_c, b_shift); //combine (insert b into a) +} + +_NEON2SSE_GLOBAL uint8x16_t vsriq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8 +#define vsriq_n_u8 vsriq_n_s8 + +_NEON2SSE_GLOBAL uint16x8_t vsriq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16 +#define vsriq_n_u16 vsriq_n_s16 + +_NEON2SSE_GLOBAL uint32x4_t vsriq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32 +#define vsriq_n_u32 vsriq_n_s32 + +_NEON2SSE_GLOBAL uint64x2_t vsriq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64 +#define vsriq_n_u64 vsriq_n_s64 + +_NEON2SSE_GLOBAL poly8x16_t vsriq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8 +#define vsriq_n_p8 vsriq_n_u8 + +_NEON2SSE_GLOBAL poly16x8_t vsriq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16 +#define vsriq_n_p16 vsriq_n_u16 + +//***** Vector shift left and insert ********************************************* +//********************************************************************************* +//Actually the "c" right bits from "a" are the only bits remained from "a" after the shift. +//All other bits are taken from b shifted. Ending zeros are inserted in b in the shift process. We need to combine "a" and "b shifted". +_NEON2SSESTORAGE int8x8_t vsli_n_s8(int8x8_t a, int8x8_t b, __constrange(0,7) int c); // VSLI.8 d0,d0,#0 +_NEON2SSE_INLINE int8x8_t vsli_n_s8(int8x8_t a, int8x8_t b, __constrange(0,7) int c) +{ + int8x8_t res64; + return64(vsliq_n_s8(_pM128i(a),_pM128i(b), c)); +} + + +_NEON2SSESTORAGE int16x4_t vsli_n_s16(int16x4_t a, int16x4_t b, __constrange(0,15) int c); // VSLI.16 d0,d0,#0 +_NEON2SSE_INLINE int16x4_t vsli_n_s16(int16x4_t a, int16x4_t b, __constrange(0,15) int c) +{ + int16x4_t res64; + return64(vsliq_n_s16(_pM128i(a),_pM128i(b), c)); +} + + +_NEON2SSESTORAGE int32x2_t vsli_n_s32(int32x2_t a, int32x2_t b, __constrange(0,31) int c); // VSLI.32 d0,d0,#0 +_NEON2SSE_INLINE int32x2_t vsli_n_s32(int32x2_t a, int32x2_t b, __constrange(0,31) int c) +{ + int32x2_t res64; + return64(vsliq_n_s32(_pM128i(a),_pM128i(b), c)); +} + +_NEON2SSESTORAGE int64x1_t vsli_n_s64(int64x1_t a, int64x1_t b, __constrange(0,63) int c); // VSLI.64 d0,d0,#0 +_NEON2SSE_INLINE int64x1_t vsli_n_s64(int64x1_t a, int64x1_t b, __constrange(0,63) int c) +{ + int64x1_t res; + res.m64_i64[0] = (b.m64_i64[0] << c) | ((a.m64_u64[0] << (64 - c)) >> (64 - c)); //need to treat a as unsigned to get leading zeros + return res; +} + + +_NEON2SSE_GLOBAL uint8x8_t vsli_n_u8(uint8x8_t a, uint8x8_t b, __constrange(0,7) int c); // VSLI.8 d0,d0,#0 +#define vsli_n_u8 vsli_n_s8 + +_NEON2SSE_GLOBAL uint16x4_t vsli_n_u16(uint16x4_t a, uint16x4_t b, __constrange(0,15) int c); // VSLI.16 d0,d0,#0 +#define vsli_n_u16 vsli_n_s16 + +_NEON2SSE_GLOBAL uint32x2_t vsli_n_u32(uint32x2_t a, uint32x2_t b, __constrange(0,31) int c); // VSLI.32 d0,d0,#0 +#define vsli_n_u32 vsli_n_s32 + +_NEON2SSE_GLOBAL uint64x1_t vsli_n_u64(uint64x1_t a, uint64x1_t b, __constrange(0,63) int c); // VSLI.64 d0,d0,#0 +#define vsli_n_u64 vsli_n_s64 + +_NEON2SSE_GLOBAL poly8x8_t vsli_n_p8(poly8x8_t a, poly8x8_t b, __constrange(0,7) int c); // VSLI.8 d0,d0,#0 +#define vsli_n_p8 vsli_n_u8 + +_NEON2SSE_GLOBAL poly16x4_t vsli_n_p16(poly16x4_t a, poly16x4_t b, __constrange(0,15) int c); // VSLI.16 d0,d0,#0 +#define vsli_n_p16 vsli_n_u16 + +_NEON2SSESTORAGE int8x16_t vsliq_n_s8(int8x16_t a, int8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0 +_NEON2SSE_INLINE int8x16_t vsliq_n_s8(int8x16_t a, int8x16_t b, __constrange(0,7) int c) // VSLI.8 q0,q0,#0 +{ + __m128i maskA, a_masked; + int8x16_t b_shift; + _NEON2SSE_ALIGN_16 static const uint8_t maskRight[8] = {0x0, 0x1, 0x3, 0x7, 0x0f, 0x1f, 0x3f, 0x7f}; //"a" bits mask + maskA = _mm_set1_epi8(maskRight[c]); // (8-c)zeros and c ones + b_shift = vshlq_n_s8( b, c); + a_masked = _mm_and_si128 (a, maskA); + return _mm_or_si128 (b_shift, a_masked); //combine (insert b into a) +} + +_NEON2SSESTORAGE int16x8_t vsliq_n_s16(int16x8_t a, int16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0 +_NEON2SSE_INLINE int16x8_t vsliq_n_s16(int16x8_t a, int16x8_t b, __constrange(0,15) int c) // VSLI.16 q0,q0,#0 +{ + //to cut "c" right bits from a we do shift left and then logical shift back right providing (16-c)zeros in a + int16x8_t b_shift; + int16x8_t a_c; + b_shift = vshlq_n_s16( b, c); + a_c = vshlq_n_s16( a, (16 - c)); + a_c = _mm_srli_epi16(a_c, (16 - c)); + return _mm_or_si128 (b_shift, a_c); //combine (insert b into a) +} + +_NEON2SSESTORAGE int32x4_t vsliq_n_s32(int32x4_t a, int32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0 +_NEON2SSE_INLINE int32x4_t vsliq_n_s32(int32x4_t a, int32x4_t b, __constrange(0,31) int c) // VSLI.32 q0,q0,#0 +{ + //solution may be not optimal compared with the serial one + //to cut "c" right bits from a we do shift left and then logical shift back right providing (32-c)zeros in a + int32x4_t b_shift; + int32x4_t a_c; + b_shift = vshlq_n_s32( b, c); + a_c = vshlq_n_s32( a, (32 - c)); + a_c = _mm_srli_epi32(a_c, (32 - c)); + return _mm_or_si128 (b_shift, a_c); //combine (insert b into a) +} + +_NEON2SSESTORAGE int64x2_t vsliq_n_s64(int64x2_t a, int64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0 +_NEON2SSE_INLINE int64x2_t vsliq_n_s64(int64x2_t a, int64x2_t b, __constrange(0,63) int c) // VSLI.64 q0,q0,#0 +{ + //solution may be not optimal compared with the serial one + //to cut "c" right bits from a we do shift left and then logical shift back right providing (64-c)zeros in a + int64x2_t b_shift; + int64x2_t a_c; + b_shift = vshlq_n_s64( b, c); + a_c = vshlq_n_s64( a, (64 - c)); + a_c = _mm_srli_epi64(a_c, (64 - c)); + return _mm_or_si128 (b_shift, a_c); //combine (insert b into a) +} + +_NEON2SSE_GLOBAL uint8x16_t vsliq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0 +#define vsliq_n_u8 vsliq_n_s8 + +_NEON2SSE_GLOBAL uint16x8_t vsliq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0 +#define vsliq_n_u16 vsliq_n_s16 + +_NEON2SSE_GLOBAL uint32x4_t vsliq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0 +#define vsliq_n_u32 vsliq_n_s32 + +_NEON2SSE_GLOBAL uint64x2_t vsliq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0 +#define vsliq_n_u64 vsliq_n_s64 + +_NEON2SSE_GLOBAL poly8x16_t vsliq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0 +#define vsliq_n_p8 vsliq_n_u8 + +_NEON2SSE_GLOBAL poly16x8_t vsliq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0 +#define vsliq_n_p16 vsliq_n_u16 + +// *********************************************************************************************** +// ****************** Loads and stores of a single vector *************************************** +// *********************************************************************************************** +//Performs loads and stores of a single vector of some type. +//******************************* Loads ******************************************************** +// *********************************************************************************************** +//We assume ptr is NOT aligned in general case and use __m128i _mm_loadu_si128 ((__m128i*) ptr);. +//also for SSE3 supporting systems the __m128i _mm_lddqu_si128 (__m128i const* p) usage for unaligned access may be advantageous. +// it loads a 32-byte block aligned on a 16-byte boundary and extracts the 16 bytes corresponding to the unaligned access +//If the ptr is aligned then could use __m128i _mm_load_si128 ((__m128i*) ptr) instead; +#define LOAD_SI128(ptr) \ + ( ((uintptr_t)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) : _mm_loadu_si128((__m128i*)(ptr)) + +_NEON2SSE_GLOBAL uint8x16_t vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, d1}, [r0] +#define vld1q_u8 LOAD_SI128 + +_NEON2SSE_GLOBAL uint16x8_t vld1q_u16(__transfersize(8) uint16_t const * ptr); // VLD1.16 {d0, d1}, [r0] +#define vld1q_u16 LOAD_SI128 + +_NEON2SSE_GLOBAL uint32x4_t vld1q_u32(__transfersize(4) uint32_t const * ptr); // VLD1.32 {d0, d1}, [r0] +#define vld1q_u32 LOAD_SI128 + +_NEON2SSE_GLOBAL uint64x2_t vld1q_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +#define vld1q_u64 LOAD_SI128 + +_NEON2SSE_GLOBAL int8x16_t vld1q_s8(__transfersize(16) int8_t const * ptr); // VLD1.8 {d0, d1}, [r0] +#define vld1q_s8 LOAD_SI128 + +_NEON2SSE_GLOBAL int16x8_t vld1q_s16(__transfersize(8) int16_t const * ptr); // VLD1.16 {d0, d1}, [r0] +#define vld1q_s16 LOAD_SI128 + +_NEON2SSE_GLOBAL int32x4_t vld1q_s32(__transfersize(4) int32_t const * ptr); // VLD1.32 {d0, d1}, [r0] +#define vld1q_s32 LOAD_SI128 + +_NEON2SSE_GLOBAL int64x2_t vld1q_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +#define vld1q_s64 LOAD_SI128 + +_NEON2SSE_GLOBAL float16x8_t vld1q_f16(__transfersize(8) __fp16 const * ptr); // VLD1.16 {d0, d1}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers +/* _NEON2SSE_INLINE float16x8_t vld1q_f16(__transfersize(8) __fp16 const * ptr)// VLD1.16 {d0, d1}, [r0] +{__m128 f1 = _mm_set_ps (ptr[3], ptr[2], ptr[1], ptr[0]); +__m128 f2; +f2 = _mm_set_ps (ptr[7], ptr[6], ptr[5], ptr[4]); +}*/ + +_NEON2SSESTORAGE float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); // VLD1.32 {d0, d1}, [r0] +_NEON2SSE_INLINE float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr) +{ + if( (((uintptr_t)(ptr)) & 15 ) == 0 ) //16 bits aligned + return _mm_load_ps(ptr); + else + return _mm_loadu_ps(ptr); +} + +_NEON2SSE_GLOBAL poly8x16_t vld1q_p8(__transfersize(16) poly8_t const * ptr); // VLD1.8 {d0, d1}, [r0] +#define vld1q_p8 LOAD_SI128 + +_NEON2SSE_GLOBAL poly16x8_t vld1q_p16(__transfersize(8) poly16_t const * ptr); // VLD1.16 {d0, d1}, [r0] +#define vld1q_p16 LOAD_SI128 + +_NEON2SSE_GLOBAL uint8x8_t vld1_u8(__transfersize(8) uint8_t const * ptr); // VLD1.8 {d0}, [r0] +#define vld1_u8(ptr) *((__m64_128*)(ptr)) //was _mm_loadl_epi64((__m128i*)(ptr)) + +_NEON2SSE_GLOBAL uint16x4_t vld1_u16(__transfersize(4) uint16_t const * ptr); // VLD1.16 {d0}, [r0] +#define vld1_u16 vld1_u8 + +_NEON2SSE_GLOBAL uint32x2_t vld1_u32(__transfersize(2) uint32_t const * ptr); // VLD1.32 {d0}, [r0] +#define vld1_u32 vld1_u8 + + +_NEON2SSE_GLOBAL uint64x1_t vld1_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0] +#define vld1_u64 vld1_u8 + +_NEON2SSE_GLOBAL int8x8_t vld1_s8(__transfersize(8) int8_t const * ptr); // VLD1.8 {d0}, [r0] +#define vld1_s8 vld1_u8 + +_NEON2SSE_GLOBAL int16x4_t vld1_s16(__transfersize(4) int16_t const * ptr); // VLD1.16 {d0}, [r0] +#define vld1_s16 vld1_u16 + +_NEON2SSE_GLOBAL int32x2_t vld1_s32(__transfersize(2) int32_t const * ptr); // VLD1.32 {d0}, [r0] +#define vld1_s32 vld1_u32 + +_NEON2SSE_GLOBAL int64x1_t vld1_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0] +#define vld1_s64 vld1_u64 + +_NEON2SSE_GLOBAL float16x4_t vld1_f16(__transfersize(4) __fp16 const * ptr); // VLD1.16 {d0}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit like _mm_set_ps (ptr[3], ptr[2], ptr[1], ptr[0]); + +_NEON2SSESTORAGE float32x2_t vld1_f32(__transfersize(2) float32_t const * ptr); // VLD1.32 {d0}, [r0] +_NEON2SSE_INLINE float32x2_t vld1_f32(__transfersize(2) float32_t const * ptr) +{ + float32x2_t res; + res.m64_f32[0] = *(ptr); + res.m64_f32[1] = *(ptr + 1); + return res; +} + +_NEON2SSE_GLOBAL poly8x8_t vld1_p8(__transfersize(8) poly8_t const * ptr); // VLD1.8 {d0}, [r0] +#define vld1_p8 vld1_u8 + +_NEON2SSE_GLOBAL poly16x4_t vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, [r0] +#define vld1_p16 vld1_u16 + + +_NEON2SSESTORAGE float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +_NEON2SSE_INLINE float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr) +{ + if ((((uintptr_t)(ptr)) & 15) == 0) //16 bits aligned + return _mm_load_pd(ptr); + else + return _mm_loadu_pd(ptr); +} + + +//*********************************************************************************************************** +//******* Lane load functions - insert the data at vector's given position (lane) ************************* +//*********************************************************************************************************** +_NEON2SSE_GLOBAL uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); // VLD1.8 {d0[0]}, [r0] +#define vld1q_lane_u8(ptr, vec, lane) _MM_INSERT_EPI8(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0] +#define vld1q_lane_u16(ptr, vec, lane) _MM_INSERT_EPI16(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0] +#define vld1q_lane_u32(ptr, vec, lane) _MM_INSERT_EPI32(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); // VLD1.64 {d0}, [r0] +#define vld1q_lane_u64(ptr, vec, lane) _MM_INSERT_EPI64(vec, *(ptr), lane) + + +_NEON2SSE_GLOBAL int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); // VLD1.8 {d0[0]}, [r0] +#define vld1q_lane_s8(ptr, vec, lane) _MM_INSERT_EPI8(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0] +#define vld1q_lane_s16(ptr, vec, lane) _MM_INSERT_EPI16(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0] +#define vld1q_lane_s32(ptr, vec, lane) _MM_INSERT_EPI32(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL float16x8_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0] +_NEON2SSE_INLINE float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane) +{ + //we need to deal with ptr 16bit NOT aligned case + __m128 p; + p = _mm_set1_ps(*(ptr)); + return _MM_INSERT_PS(vec, p, _INSERTPS_NDX(0, lane)); +} + +_NEON2SSE_GLOBAL int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); // VLD1.64 {d0}, [r0] +#define vld1q_lane_s64(ptr, vec, lane) _MM_INSERT_EPI64(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); // VLD1.8 {d0[0]}, [r0] +#define vld1q_lane_p8(ptr, vec, lane) _MM_INSERT_EPI8(vec, *(ptr), lane) + +_NEON2SSE_GLOBAL poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0] +#define vld1q_lane_p16(ptr, vec, lane) _MM_INSERT_EPI16(vec, *(ptr), lane) + +_NEON2SSESTORAGE uint8x8_t vld1_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x8_t vec, __constrange(0,7) int lane); // VLD1.8 {d0[0]}, [r0] +_NEON2SSE_INLINE uint8x8_t vld1_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x8_t vec, __constrange(0,7) int lane) +{ + uint8x8_t res; + res = vec; + res.m64_u8[lane] = *(ptr); + return res; +} + +_NEON2SSESTORAGE uint16x4_t vld1_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x4_t vec, __constrange(0,3) int lane); // VLD1.16 {d0[0]}, [r0] +_NEON2SSE_INLINE uint16x4_t vld1_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x4_t vec, __constrange(0,3) int lane) +{ + uint16x4_t res; + res = vec; + res.m64_u16[lane] = *(ptr); + return res; +} + +_NEON2SSESTORAGE uint32x2_t vld1_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x2_t vec, __constrange(0,1) int lane); // VLD1.32 {d0[0]}, [r0] +_NEON2SSE_INLINE uint32x2_t vld1_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x2_t vec, __constrange(0,1) int lane) +{ + uint32x2_t res; + res = vec; + res.m64_u32[lane] = *(ptr); + return res; +} + +_NEON2SSESTORAGE uint64x1_t vld1_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x1_t vec, __constrange(0,0) int lane); // VLD1.64 {d0}, [r0] +_NEON2SSE_INLINE uint64x1_t vld1_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x1_t vec, __constrange(0,0) int lane) +{ + uint64x1_t res; + UNREFERENCED_PARAM(vec); + UNREFERENCED_PARAM(lane); + res.m64_u64[0] = *(ptr); + return res; +} + + +_NEON2SSE_GLOBAL int8x8_t vld1_lane_s8(__transfersize(1) int8_t const * ptr, int8x8_t vec, __constrange(0,7) int lane); // VLD1.8 {d0[0]}, [r0] +#define vld1_lane_s8(ptr, vec, lane) vld1_lane_u8((uint8_t*)ptr, vec, lane) + +_NEON2SSE_GLOBAL int16x4_t vld1_lane_s16(__transfersize(1) int16_t const * ptr, int16x4_t vec, __constrange(0,3) int lane); // VLD1.16 {d0[0]}, [r0] +#define vld1_lane_s16(ptr, vec, lane) vld1_lane_u16((uint16_t*)ptr, vec, lane) + +_NEON2SSE_GLOBAL int32x2_t vld1_lane_s32(__transfersize(1) int32_t const * ptr, int32x2_t vec, __constrange(0,1) int lane); // VLD1.32 {d0[0]}, [r0] +#define vld1_lane_s32(ptr, vec, lane) vld1_lane_u32((uint32_t*)ptr, vec, lane) + +_NEON2SSE_GLOBAL float16x4_t vld1_lane_f16(__transfersize(1) __fp16 const * ptr, float16x4_t vec, __constrange(0,3) int lane); // VLD1.16 {d0[0]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE float32x2_t vld1_lane_f32(__transfersize(1) float32_t const * ptr, float32x2_t vec, __constrange(0,1) int lane); // VLD1.32 {d0[0]}, [r0] +_NEON2SSE_INLINE float32x2_t vld1_lane_f32(__transfersize(1) float32_t const * ptr, float32x2_t vec, __constrange(0,1) int lane) +{ + float32x2_t res; + res = vec; + res.m64_f32[lane] = *(ptr); + return res; +} + +_NEON2SSE_GLOBAL int64x1_t vld1_lane_s64(__transfersize(1) int64_t const * ptr, int64x1_t vec, __constrange(0,0) int lane); // VLD1.64 {d0}, [r0] +#define vld1_lane_s64(ptr, vec, lane) vld1_lane_u64((uint64_t*)ptr, vec, lane) + +_NEON2SSE_GLOBAL poly8x8_t vld1_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x8_t vec, __constrange(0,7) int lane); // VLD1.8 {d0[0]}, [r0] +#define vld1_lane_p8 vld1_lane_u8 + +_NEON2SSE_GLOBAL poly16x4_t vld1_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x4_t vec, __constrange(0,3) int lane); // VLD1.16 {d0[0]}, [r0] +#define vld1_lane_p16 vld1_lane_s16 + +// ****************** Load single value ( set all lanes of vector with same value from memory)********************** +// ****************************************************************************************************************** +_NEON2SSE_GLOBAL uint8x16_t vld1q_dup_u8(__transfersize(1) uint8_t const * ptr); // VLD1.8 {d0[]}, [r0] +#define vld1q_dup_u8(ptr) _mm_set1_epi8(*(ptr)) + +_NEON2SSE_GLOBAL uint16x8_t vld1q_dup_u16(__transfersize(1) uint16_t const * ptr); // VLD1.16 {d0[]}, [r0] +#define vld1q_dup_u16(ptr) _mm_set1_epi16(*(ptr)) + +_NEON2SSE_GLOBAL uint32x4_t vld1q_dup_u32(__transfersize(1) uint32_t const * ptr); // VLD1.32 {d0[]}, [r0] +#define vld1q_dup_u32(ptr) _mm_set1_epi32(*(ptr)) + +_NEON2SSESTORAGE uint64x2_t vld1q_dup_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_INLINE uint64x2_t vld1q_dup_u64(__transfersize(1) uint64_t const * ptr) +{ + _NEON2SSE_ALIGN_16 uint64_t val[2]; + + val[0] = *(ptr); + val[1] = *(ptr); + + return LOAD_SI128(val); +} + +_NEON2SSE_GLOBAL int8x16_t vld1q_dup_s8(__transfersize(1) int8_t const * ptr); // VLD1.8 {d0[]}, [r0] +#define vld1q_dup_s8(ptr) _mm_set1_epi8(*(ptr)) + +_NEON2SSE_GLOBAL int16x8_t vld1q_dup_s16(__transfersize(1) int16_t const * ptr); // VLD1.16 {d0[]}, [r0] +#define vld1q_dup_s16(ptr) _mm_set1_epi16 (*(ptr)) + +_NEON2SSE_GLOBAL int32x4_t vld1q_dup_s32(__transfersize(1) int32_t const * ptr); // VLD1.32 {d0[]}, [r0] +#define vld1q_dup_s32(ptr) _mm_set1_epi32 (*(ptr)) + +_NEON2SSE_GLOBAL int64x2_t vld1q_dup_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0] +#define vld1q_dup_s64(ptr) vld1q_dup_u64((uint64_t*)ptr) + +_NEON2SSE_GLOBAL float16x8_t vld1q_dup_f16(__transfersize(1) __fp16 const * ptr); // VLD1.16 {d0[]}, [r0] +//current IA SIMD doesn't support float16, need to go to 32 bits + +_NEON2SSE_GLOBAL float32x4_t vld1q_dup_f32(__transfersize(1) float32_t const * ptr); // VLD1.32 {d0[]}, [r0] +#define vld1q_dup_f32(ptr) _mm_set1_ps (*(ptr)) + +_NEON2SSE_GLOBAL poly8x16_t vld1q_dup_p8(__transfersize(1) poly8_t const * ptr); // VLD1.8 {d0[]}, [r0] +#define vld1q_dup_p8(ptr) _mm_set1_epi8(*(ptr)) + +_NEON2SSE_GLOBAL poly16x8_t vld1q_dup_p16(__transfersize(1) poly16_t const * ptr); // VLD1.16 {d0[]}, [r0] +#define vld1q_dup_p16(ptr) _mm_set1_epi16 (*(ptr)) + +_NEON2SSESTORAGE uint8x8_t vld1_dup_u8(__transfersize(1) uint8_t const * ptr); // VLD1.8 {d0[]}, [r0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x8_t vld1_dup_u8(__transfersize(1) uint8_t const * ptr), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint8x8_t res; + int i; + for(i = 0; i<8; i++) { + res.m64_u8[i] = *(ptr); + } + return res; +} + +_NEON2SSESTORAGE uint16x4_t vld1_dup_u16(__transfersize(1) uint16_t const * ptr); // VLD1.16 {d0[]}, [r0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x4_t vld1_dup_u16(__transfersize(1) uint16_t const * ptr), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint16x4_t res; + int i; + for(i = 0; i<4; i++) { + res.m64_u16[i] = *(ptr); + } + return res; +} + +_NEON2SSESTORAGE uint32x2_t vld1_dup_u32(__transfersize(1) uint32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vld1_dup_u32(__transfersize(1) uint32_t const * ptr), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint32x2_t res; + res.m64_u32[0] = *(ptr); + res.m64_u32[1] = *(ptr); + return res; +} + +_NEON2SSESTORAGE uint64x1_t vld1_dup_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0] +_NEON2SSE_INLINE uint64x1_t vld1_dup_u64(__transfersize(1) uint64_t const * ptr) +{ + uint64x1_t res; + res.m64_u64[0] = *(ptr); + return res; +} + +_NEON2SSE_GLOBAL int8x8_t vld1_dup_s8(__transfersize(1) int8_t const * ptr); // VLD1.8 {d0[]}, [r0] +#define vld1_dup_s8(ptr) vld1_dup_u8((uint8_t*)ptr) + + +_NEON2SSE_GLOBAL int16x4_t vld1_dup_s16(__transfersize(1) int16_t const * ptr); // VLD1.16 {d0[]}, [r0] +#define vld1_dup_s16(ptr) vld1_dup_u16((uint16_t*)ptr) + + +_NEON2SSE_GLOBAL int32x2_t vld1_dup_s32(__transfersize(1) int32_t const * ptr); // VLD1.32 {d0[]}, [r0] +#define vld1_dup_s32(ptr) vld1_dup_u32((uint32_t*)ptr) + + +_NEON2SSE_GLOBAL int64x1_t vld1_dup_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0] +#define vld1_dup_s64(ptr) vld1_dup_u64((uint64_t*)ptr) + +_NEON2SSE_GLOBAL float16x4_t vld1_dup_f16(__transfersize(1) __fp16 const * ptr); // VLD1.16 {d0[]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE float32x2_t vld1_dup_f32(__transfersize(1) float32_t const * ptr); // VLD1.32 {d0[]}, [r0] +_NEON2SSE_INLINE float32x2_t vld1_dup_f32(__transfersize(1) float32_t const * ptr) +{ + float32x2_t res; + res.m64_f32[0] = *(ptr); + res.m64_f32[1] = res.m64_f32[0]; + return res; // use last 64bits only +} + +_NEON2SSE_GLOBAL poly8x8_t vld1_dup_p8(__transfersize(1) poly8_t const * ptr); // VLD1.8 {d0[]}, [r0] +#define vld1_dup_p8 vld1_dup_u8 + + +_NEON2SSE_GLOBAL poly16x4_t vld1_dup_p16(__transfersize(1) poly16_t const * ptr); // VLD1.16 {d0[]}, [r0] +#define vld1_dup_p16 vld1_dup_u16 + + +//************************************************************************************* +//********************************* Store ********************************************** +//************************************************************************************* +// If ptr is 16bit aligned and you need to store data without cache pollution then use void _mm_stream_si128 ((__m128i*)ptr, val); +//here we assume the case of NOT 16bit aligned ptr possible. If it is aligned we could to use _mm_store_si128 like shown in the following macro +#define STORE_SI128(ptr, val) \ + (((uintptr_t)(ptr) & 15) == 0 ) ? _mm_store_si128 ((__m128i*)(ptr), val) : _mm_storeu_si128 ((__m128i*)(ptr), val); + +_NEON2SSE_GLOBAL void vst1q_u8(__transfersize(16) uint8_t * ptr, uint8x16_t val); // VST1.8 {d0, d1}, [r0] +#define vst1q_u8 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_u16(__transfersize(8) uint16_t * ptr, uint16x8_t val); // VST1.16 {d0, d1}, [r0] +#define vst1q_u16 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_u32(__transfersize(4) uint32_t * ptr, uint32x4_t val); // VST1.32 {d0, d1}, [r0] +#define vst1q_u32 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_u64(__transfersize(2) uint64_t * ptr, uint64x2_t val); // VST1.64 {d0, d1}, [r0] +#define vst1q_u64 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_s8(__transfersize(16) int8_t * ptr, int8x16_t val); // VST1.8 {d0, d1}, [r0] +#define vst1q_s8 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_s16(__transfersize(8) int16_t * ptr, int16x8_t val); // VST1.16 {d0, d1}, [r0] +#define vst1q_s16 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_s32(__transfersize(4) int32_t * ptr, int32x4_t val); // VST1.32 {d0, d1}, [r0] +#define vst1q_s32 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_s64(__transfersize(2) int64_t * ptr, int64x2_t val); // VST1.64 {d0, d1}, [r0] +#define vst1q_s64 STORE_SI128 + +_NEON2SSE_GLOBAL void vst1q_f16(__transfersize(8) __fp16 * ptr, float16x8_t val); // VST1.16 {d0, d1}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently + +_NEON2SSESTORAGE void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val); // VST1.32 {d0, d1}, [r0] +_NEON2SSE_INLINE void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val) +{ + if( ((uintptr_t)(ptr) & 15) == 0 ) //16 bits aligned + _mm_store_ps (ptr, val); + else + _mm_storeu_ps (ptr, val); +} + +_NEON2SSE_GLOBAL void vst1q_p8(__transfersize(16) poly8_t * ptr, poly8x16_t val); // VST1.8 {d0, d1}, [r0] +#define vst1q_p8 vst1q_u8 + +_NEON2SSE_GLOBAL void vst1q_p16(__transfersize(8) poly16_t * ptr, poly16x8_t val); // VST1.16 {d0, d1}, [r0] +#define vst1q_p16 vst1q_u16 + +_NEON2SSESTORAGE void vst1_u8(__transfersize(8) uint8_t * ptr, uint8x8_t val); // VST1.8 {d0}, [r0] +_NEON2SSE_INLINE void vst1_u8(__transfersize(8) uint8_t * ptr, uint8x8_t val) +{ + int i; + for (i = 0; i<8; i++) { + *(ptr + i) = ((uint8_t*)&val)[i]; + } + //_mm_storel_epi64((__m128i*)ptr, val); + return; +} + +_NEON2SSESTORAGE void vst1_u16(__transfersize(4) uint16_t * ptr, uint16x4_t val); // VST1.16 {d0}, [r0] +_NEON2SSE_INLINE void vst1_u16(__transfersize(4) uint16_t * ptr, uint16x4_t val) +{ + int i; + for (i = 0; i<4; i++) { + *(ptr + i) = ((uint16_t*)&val)[i]; + } + //_mm_storel_epi64((__m128i*)ptr, val); + return; +} + +_NEON2SSESTORAGE void vst1_u32(__transfersize(2) uint32_t * ptr, uint32x2_t val); // VST1.32 {d0}, [r0] +_NEON2SSE_INLINE void vst1_u32(__transfersize(2) uint32_t * ptr, uint32x2_t val) +{ + int i; + for (i = 0; i<2; i++) { + *(ptr + i) = ((uint32_t*)&val)[i]; + } + //_mm_storel_epi64((__m128i*)ptr, val); + return; +} + +_NEON2SSESTORAGE void vst1_u64(__transfersize(1) uint64_t * ptr, uint64x1_t val); // VST1.64 {d0}, [r0] +_NEON2SSE_INLINE void vst1_u64(__transfersize(1) uint64_t * ptr, uint64x1_t val) +{ + *(ptr) = *((uint64_t*)&val); + //_mm_storel_epi64((__m128i*)ptr, val); + return; +} + +_NEON2SSE_GLOBAL void vst1_s8(__transfersize(8) int8_t * ptr, int8x8_t val); // VST1.8 {d0}, [r0] +#define vst1_s8(ptr,val) vst1_u8((uint8_t*)ptr,val) + +_NEON2SSE_GLOBAL void vst1_s16(__transfersize(4) int16_t * ptr, int16x4_t val); // VST1.16 {d0}, [r0] +#define vst1_s16(ptr,val) vst1_u16((uint16_t*)ptr,val) + +_NEON2SSE_GLOBAL void vst1_s32(__transfersize(2) int32_t * ptr, int32x2_t val); // VST1.32 {d0}, [r0] +#define vst1_s32(ptr,val) vst1_u32((uint32_t*)ptr,val) + +_NEON2SSE_GLOBAL void vst1_s64(__transfersize(1) int64_t * ptr, int64x1_t val); // VST1.64 {d0}, [r0] +#define vst1_s64(ptr,val) vst1_u64((uint64_t*)ptr,val) + +_NEON2SSE_GLOBAL void vst1_f16(__transfersize(4) __fp16 * ptr, float16x4_t val); // VST1.16 {d0}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE void vst1_f32(__transfersize(2) float32_t * ptr, float32x2_t val); // VST1.32 {d0}, [r0] +_NEON2SSE_INLINE void vst1_f32(__transfersize(2) float32_t * ptr, float32x2_t val) +{ + *(ptr) = val.m64_f32[0]; + *(ptr + 1) = val.m64_f32[1]; + return; +} + +_NEON2SSE_GLOBAL void vst1_p8(__transfersize(8) poly8_t * ptr, poly8x8_t val); // VST1.8 {d0}, [r0] +#define vst1_p8 vst1_u8 + +_NEON2SSE_GLOBAL void vst1_p16(__transfersize(4) poly16_t * ptr, poly16x4_t val); // VST1.16 {d0}, [r0] +#define vst1_p16 vst1_u16 + +//***********Store a lane of a vector into memory (extract given lane) ********************* +//****************************************************************************************** +_NEON2SSE_GLOBAL void vst1q_lane_u8(__transfersize(1) uint8_t * ptr, uint8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0] +#define vst1q_lane_u8(ptr, val, lane) *(ptr) = (uint8_t) _MM_EXTRACT_EPI8 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_u16(__transfersize(1) uint16_t * ptr, uint16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0] +#define vst1q_lane_u16(ptr, val, lane) *(ptr) = (uint16_t) _MM_EXTRACT_EPI16 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_u32(__transfersize(1) uint32_t * ptr, uint32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0] +#define vst1q_lane_u32(ptr, val, lane) *(ptr) = (uint32_t) _MM_EXTRACT_EPI32 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_u64(__transfersize(1) uint64_t * ptr, uint64x2_t val, __constrange(0,1) int lane); // VST1.64 {d0}, [r0] +#define vst1q_lane_u64(ptr, val, lane) *(ptr) = (uint64_t) _MM_EXTRACT_EPI64 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_s8(__transfersize(1) int8_t * ptr, int8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0] +#define vst1q_lane_s8(ptr, val, lane) *(ptr) = (int8_t) _MM_EXTRACT_EPI8 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_s16(__transfersize(1) int16_t * ptr, int16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0] +#define vst1q_lane_s16(ptr, val, lane) *(ptr) = (int16_t) _MM_EXTRACT_EPI16 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_s32(__transfersize(1) int32_t * ptr, int32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0] +#define vst1q_lane_s32(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI32 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_s64(__transfersize(1) int64_t * ptr, int64x2_t val, __constrange(0,1) int lane); // VST1.64 {d0}, [r0] +#define vst1q_lane_s64(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI64 (val, lane) + +_NEON2SSE_GLOBAL void vst1q_lane_f16(__transfersize(1) __fp16 * ptr, float16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE void vst1q_lane_f32(__transfersize(1) float32_t * ptr, float32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0] +_NEON2SSE_INLINE void vst1q_lane_f32(__transfersize(1) float32_t * ptr, float32x4_t val, __constrange(0,3) int lane) +{ + *((int32_t*)ptr) = _MM_EXTRACT_PS(val,lane); +} + +_NEON2SSE_GLOBAL void vst1q_lane_p8(__transfersize(1) poly8_t * ptr, poly8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0] +#define vst1q_lane_p8 vst1q_lane_u8 + +_NEON2SSE_GLOBAL void vst1q_lane_p16(__transfersize(1) poly16_t * ptr, poly16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0] +#define vst1q_lane_p16 vst1q_lane_s16 + +_NEON2SSESTORAGE void vst1_lane_u8(__transfersize(1) uint8_t * ptr, uint8x8_t val, __constrange(0,7) int lane); // VST1.8 {d0[0]}, [r0] +_NEON2SSE_INLINE void vst1_lane_u8(__transfersize(1) uint8_t * ptr, uint8x8_t val, __constrange(0,7) int lane) +{ + *(ptr) = val.m64_u8[lane]; +} + +_NEON2SSESTORAGE void vst1_lane_u16(__transfersize(1) uint16_t * ptr, uint16x4_t val, __constrange(0,3) int lane); // VST1.16 {d0[0]}, [r0] +_NEON2SSE_INLINE void vst1_lane_u16(__transfersize(1) uint16_t * ptr, uint16x4_t val, __constrange(0,3) int lane) +{ + *(ptr) = val.m64_u16[lane]; +} + +_NEON2SSESTORAGE void vst1_lane_u32(__transfersize(1) uint32_t * ptr, uint32x2_t val, __constrange(0,1) int lane); // VST1.32 {d0[0]}, [r0] +_NEON2SSE_INLINE void vst1_lane_u32(__transfersize(1) uint32_t * ptr, uint32x2_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.m64_u32[lane]; +} + +_NEON2SSESTORAGE void vst1_lane_u64(__transfersize(1) uint64_t * ptr, uint64x1_t val, __constrange(0,0) int lane); // VST1.64 {d0}, [r0] +_NEON2SSE_INLINE void vst1_lane_u64(__transfersize(1) uint64_t * ptr, uint64x1_t val, __constrange(0,0) int lane) +{ + UNREFERENCED_PARAM(lane); + *(ptr) = val.m64_u64[0]; +} + +_NEON2SSE_GLOBAL void vst1_lane_s8(__transfersize(1) int8_t * ptr, int8x8_t val, __constrange(0,7) int lane); // VST1.8 {d0[0]}, [r0] +#define vst1_lane_s8(ptr, val, lane) vst1_lane_u8((uint8_t*)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst1_lane_s16(__transfersize(1) int16_t * ptr, int16x4_t val, __constrange(0,3) int lane); // VST1.16 {d0[0]}, [r0] +#define vst1_lane_s16(ptr, val, lane) vst1_lane_u16((uint16_t*)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst1_lane_s32(__transfersize(1) int32_t * ptr, int32x2_t val, __constrange(0,1) int lane); // VST1.32 {d0[0]}, [r0] +#define vst1_lane_s32(ptr, val, lane) vst1_lane_u32((uint32_t*)ptr, val, lane) + + +_NEON2SSE_GLOBAL void vst1_lane_s64(__transfersize(1) int64_t * ptr, int64x1_t val, __constrange(0,0) int lane); // VST1.64 {d0}, [r0] +#define vst1_lane_s64(ptr, val, lane) vst1_lane_u64((uint64_t*)ptr, val, lane) + + +_NEON2SSE_GLOBAL void vst1_lane_f16(__transfersize(1) __fp16 * ptr, float16x4_t val, __constrange(0,3) int lane); // VST1.16 {d0[0]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE void vst1_lane_f32(__transfersize(1) float32_t * ptr, float32x2_t val, __constrange(0,1) int lane); // VST1.32 {d0[0]}, [r0] +_NEON2SSE_INLINE void vst1_lane_f32(__transfersize(1) float32_t * ptr, float32x2_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.m64_f32[lane]; +} + +_NEON2SSE_GLOBAL void vst1_lane_p8(__transfersize(1) poly8_t * ptr, poly8x8_t val, __constrange(0,7) int lane); // VST1.8 {d0[0]}, [r0] +#define vst1_lane_p8 vst1_lane_u8 + +_NEON2SSE_GLOBAL void vst1_lane_p16(__transfersize(1) poly16_t * ptr, poly16x4_t val, __constrange(0,3) int lane); // VST1.16 {d0[0]}, [r0] +#define vst1_lane_p16 vst1_lane_s16 + +//*********************************************************************************************** +//**************** Loads and stores of an N-element structure ********************************** +//*********************************************************************************************** +//These intrinsics load or store an n-element structure. The array structures are defined in the beginning +//We assume ptr is NOT aligned in general case, for more details see "Loads and stores of a single vector functions" +//****************** 2 elements load ********************************************* +_NEON2SSESTORAGE uint8x16x2_t vld2q_u8(__transfersize(32) uint8_t const * ptr); // VLD2.8 {d0, d2}, [r0] +_NEON2SSE_INLINE uint8x16x2_t vld2q_u8(__transfersize(32) uint8_t const * ptr) // VLD2.8 {d0, d2}, [r0] +{ + uint8x16x2_t v; + v.val[0] = vld1q_u8(ptr); + v.val[1] = vld1q_u8((ptr + 16)); + v = vuzpq_s8(v.val[0], v.val[1]); + return v; +} + +_NEON2SSESTORAGE uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr); // VLD2.16 {d0, d2}, [r0] +_NEON2SSE_INLINE uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr) // VLD2.16 {d0, d2}, [r0] +{ + uint16x8x2_t v; + v.val[0] = vld1q_u16( ptr); + v.val[1] = vld1q_u16( (ptr + 8)); + v = vuzpq_s16(v.val[0], v.val[1]); + return v; +} + +_NEON2SSESTORAGE uint32x4x2_t vld2q_u32(__transfersize(8) uint32_t const * ptr); // VLD2.32 {d0, d2}, [r0] +_NEON2SSE_INLINE uint32x4x2_t vld2q_u32(__transfersize(8) uint32_t const * ptr) // VLD2.32 {d0, d2}, [r0] +{ + uint32x4x2_t v; + v.val[0] = vld1q_u32 ( ptr); + v.val[1] = vld1q_u32 ( (ptr + 4)); + v = vuzpq_s32(v.val[0], v.val[1]); + return v; +} + +_NEON2SSE_GLOBAL int8x16x2_t vld2q_s8(__transfersize(32) int8_t const * ptr); +#define vld2q_s8(ptr) vld2q_u8((uint8_t*) ptr) + +_NEON2SSE_GLOBAL int16x8x2_t vld2q_s16(__transfersize(16) int16_t const * ptr); // VLD2.16 {d0, d2}, [r0] +#define vld2q_s16(ptr) vld2q_u16((uint16_t*) ptr) + +_NEON2SSE_GLOBAL int32x4x2_t vld2q_s32(__transfersize(8) int32_t const * ptr); // VLD2.32 {d0, d2}, [r0] +#define vld2q_s32(ptr) vld2q_u32((uint32_t*) ptr) + + +_NEON2SSE_GLOBAL float16x8x2_t vld2q_f16(__transfersize(16) __fp16 const * ptr); // VLD2.16 {d0, d2}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x4x2_t vld2q_f32(__transfersize(8) float32_t const * ptr); // VLD2.32 {d0, d2}, [r0] +_NEON2SSE_INLINE float32x4x2_t vld2q_f32(__transfersize(8) float32_t const * ptr) // VLD2.32 {d0, d2}, [r0] +{ + float32x4x2_t v; + v.val[0] = vld1q_f32 (ptr); + v.val[1] = vld1q_f32 ((ptr + 4)); + v = vuzpq_f32(v.val[0], v.val[1]); + return v; +} + +_NEON2SSE_GLOBAL poly8x16x2_t vld2q_p8(__transfersize(32) poly8_t const * ptr); // VLD2.8 {d0, d2}, [r0] +#define vld2q_p8 vld2q_u8 + +_NEON2SSE_GLOBAL poly16x8x2_t vld2q_p16(__transfersize(16) poly16_t const * ptr); // VLD2.16 {d0, d2}, [r0] +#define vld2q_p16 vld2q_u16 + +_NEON2SSESTORAGE uint8x8x2_t vld2_u8(__transfersize(16) uint8_t const * ptr); // VLD2.8 {d0, d1}, [r0] +_NEON2SSE_INLINE uint8x8x2_t vld2_u8(__transfersize(16) uint8_t const * ptr) +{ + uint8x8x2_t v; + __m128i ld128; + ld128 = vld1q_u8(ptr); //merge two 64-bits in 128 bit + ld128 = _mm_shuffle_epi8(ld128, *(__m128i*)mask8_16_even_odd); + vst1q_u8((v.val), ld128); // v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32); + return v; +} + +_NEON2SSESTORAGE uint16x4x2_t vld2_u16(__transfersize(8) uint16_t const * ptr); // VLD2.16 {d0, d1}, [r0] +_NEON2SSE_INLINE uint16x4x2_t vld2_u16(__transfersize(8) uint16_t const * ptr) +{ + _NEON2SSE_ALIGN_16 uint16x4x2_t v; + __m128i ld128; + ld128 = vld1q_u16(ptr); //merge two 64-bits in 128 bit + ld128 = _mm_shuffle_epi8(ld128, *(__m128i*) mask8_32_even_odd); + vst1q_u16((v.val), ld128); + return v; +} + +_NEON2SSESTORAGE uint32x2x2_t vld2_u32(__transfersize(4) uint32_t const * ptr); // VLD2.32 {d0, d1}, [r0] +_NEON2SSE_INLINE uint32x2x2_t vld2_u32(__transfersize(4) uint32_t const * ptr) +{ + _NEON2SSE_ALIGN_16 uint32x2x2_t v; + __m128i ld128; + ld128 = vld1q_u32(ptr); //merge two 64-bits in 128 bit + ld128 = _mm_shuffle_epi32(ld128, 0 | (2 << 2) | (1 << 4) | (3 << 6)); + vst1q_u32((v.val), ld128); + return v; +} + +_NEON2SSESTORAGE uint64x1x2_t vld2_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +_NEON2SSE_INLINE uint64x1x2_t vld2_u64(__transfersize(2) uint64_t const * ptr) +{ + uint64x1x2_t v; + v.val[0].m64_u64[0] = *(ptr); + v.val[1].m64_u64[0] = *(ptr + 1); + return v; +} + +_NEON2SSE_GLOBAL int8x8x2_t vld2_s8(__transfersize(16) int8_t const * ptr); // VLD2.8 {d0, d1}, [r0] +#define vld2_s8(ptr) vld2_u8((uint8_t*)ptr) + +_NEON2SSE_GLOBAL int16x4x2_t vld2_s16(__transfersize(8) int16_t const * ptr); // VLD2.16 {d0, d1}, [r0] +#define vld2_s16(ptr) vld2_u16((uint16_t*)ptr) + +_NEON2SSE_GLOBAL int32x2x2_t vld2_s32(__transfersize(4) int32_t const * ptr); // VLD2.32 {d0, d1}, [r0] +#define vld2_s32(ptr) vld2_u32((uint32_t*)ptr) + +_NEON2SSE_GLOBAL int64x1x2_t vld2_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +#define vld2_s64(ptr) vld2_u64((uint64_t*)ptr) + +_NEON2SSE_GLOBAL float16x4x2_t vld2_f16(__transfersize(8) __fp16 const * ptr); // VLD2.16 {d0, d1}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1_f16 for example + +_NEON2SSESTORAGE float32x2x2_t vld2_f32(__transfersize(4) float32_t const * ptr); // VLD2.32 {d0, d1}, [r0] +_NEON2SSE_INLINE float32x2x2_t vld2_f32(__transfersize(4) float32_t const * ptr) +{ + float32x2x2_t v; + v.val[0].m64_f32[0] = *(ptr); + v.val[0].m64_f32[1] = *(ptr + 2); + v.val[1].m64_f32[0] = *(ptr + 1); + v.val[1].m64_f32[1] = *(ptr + 3); + return v; +} + +_NEON2SSE_GLOBAL poly8x8x2_t vld2_p8(__transfersize(16) poly8_t const * ptr); // VLD2.8 {d0, d1}, [r0] +#define vld2_p8 vld2_u8 + +_NEON2SSE_GLOBAL poly16x4x2_t vld2_p16(__transfersize(8) poly16_t const * ptr); // VLD2.16 {d0, d1}, [r0] +#define vld2_p16 vld2_u16 + +//******************** Triplets *************************************** +//********************************************************************* +_NEON2SSESTORAGE uint8x16x3_t vld3q_u8(__transfersize(48) uint8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE uint8x16x3_t vld3q_u8(__transfersize(48) uint8_t const * ptr) // VLD3.8 {d0, d2, d4}, [r0] +{ + //a0,a1,a2,a3,...a7,a8,...a15, b0,b1,b2,...b7,b8,...b15, c0,c1,c2,...c7,c8,...c15 -> + //a:0,3,6,9,12,15,b:2,5,8,11,14, c:1,4,7,10,13 + //a:1,4,7,10,13, b:0,3,6,9,12,15,c:2,5,8,11,14, + //a:2,5,8,11,14, b:1,4,7,10,13, c:0,3,6,9,12,15 + uint8x16x3_t v; + __m128i tmp0, tmp1,tmp2, tmp3; + _NEON2SSE_ALIGN_16 static const int8_t mask8_0[16] = {0,3,6,9,12,15,1,4,7,10,13,2,5,8,11,14}; + _NEON2SSE_ALIGN_16 static const int8_t mask8_1[16] = {2,5,8,11,14,0,3,6,9,12,15,1,4,7,10,13}; + _NEON2SSE_ALIGN_16 static const int8_t mask8_2[16] = {1,4,7,10,13,2,5,8,11,14,0,3,6,9,12,15}; + + v.val[0] = vld1q_u8 (ptr); //a0,a1,a2,a3,...a7, ...a15 + v.val[1] = vld1q_u8 ((ptr + 16)); //b0,b1,b2,b3...b7, ...b15 + v.val[2] = vld1q_u8 ((ptr + 32)); //c0,c1,c2,c3,...c7,...c15 + + tmp0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask8_0); //a:0,3,6,9,12,15,1,4,7,10,13,2,5,8,11 + tmp1 = _mm_shuffle_epi8(v.val[1], *(__m128i*)mask8_1); //b:2,5,8,11,14,0,3,6,9,12,15,1,4,7,10,13 + tmp2 = _mm_shuffle_epi8(v.val[2], *(__m128i*)mask8_2); //c:1,4,7,10,13,2,5,8,11,14,3,6,9,12,15 + + tmp3 = _mm_slli_si128(tmp0,10); //0,0,0,0,0,0,0,0,0,0,a0,a3,a6,a9,a12,a15 + tmp3 = _mm_alignr_epi8(tmp1,tmp3, 10); //a:0,3,6,9,12,15,b:2,5,8,11,14,x,x,x,x,x + tmp3 = _mm_slli_si128(tmp3, 5); //0,0,0,0,0,a:0,3,6,9,12,15,b:2,5,8,11,14, + tmp3 = _mm_srli_si128(tmp3, 5); //a:0,3,6,9,12,15,b:2,5,8,11,14,:0,0,0,0,0 + v.val[0] = _mm_slli_si128(tmp2, 11); //0,0,0,0,0,0,0,0,0,0,0,0, 1,4,7,10,13, + v.val[0] = _mm_or_si128(v.val[0],tmp3); //a:0,3,6,9,12,15,b:2,5,8,11,14,c:1,4,7,10,13, + + tmp3 = _mm_slli_si128(tmp0, 5); //0,0,0,0,0,a:0,3,6,9,12,15,1,4,7,10,13, + tmp3 = _mm_srli_si128(tmp3, 11); //a:1,4,7,10,13, 0,0,0,0,0,0,0,0,0,0,0 + v.val[1] = _mm_srli_si128(tmp1,5); //b:0,3,6,9,12,15,C:1,4,7,10,13, 0,0,0,0,0 + v.val[1] = _mm_slli_si128(v.val[1], 5); //0,0,0,0,0,b:0,3,6,9,12,15,C:1,4,7,10,13, + v.val[1] = _mm_or_si128(v.val[1],tmp3); //a:1,4,7,10,13,b:0,3,6,9,12,15,C:1,4,7,10,13, + v.val[1] = _mm_slli_si128(v.val[1],5); //0,0,0,0,0,a:1,4,7,10,13,b:0,3,6,9,12,15, + v.val[1] = _mm_srli_si128(v.val[1], 5); //a:1,4,7,10,13,b:0,3,6,9,12,15,0,0,0,0,0 + tmp3 = _mm_srli_si128(tmp2,5); //c:2,5,8,11,14,0,3,6,9,12,15,0,0,0,0,0 + tmp3 = _mm_slli_si128(tmp3,11); //0,0,0,0,0,0,0,0,0,0,0,c:2,5,8,11,14, + v.val[1] = _mm_or_si128(v.val[1],tmp3); //a:1,4,7,10,13,b:0,3,6,9,12,15,c:2,5,8,11,14, + + tmp3 = _mm_srli_si128(tmp2,10); //c:0,3,6,9,12,15, 0,0,0,0,0,0,0,0,0,0, + tmp3 = _mm_slli_si128(tmp3,10); //0,0,0,0,0,0,0,0,0,0, c:0,3,6,9,12,15, + v.val[2] = _mm_srli_si128(tmp1,11); //b:1,4,7,10,13,0,0,0,0,0,0,0,0,0,0,0 + v.val[2] = _mm_slli_si128(v.val[2],5); //0,0,0,0,0,b:1,4,7,10,13, 0,0,0,0,0,0 + v.val[2] = _mm_or_si128(v.val[2],tmp3); //0,0,0,0,0,b:1,4,7,10,13,c:0,3,6,9,12,15, + tmp0 = _mm_srli_si128(tmp0, 11); //a:2,5,8,11,14, 0,0,0,0,0,0,0,0,0,0,0, + v.val[2] = _mm_or_si128(v.val[2],tmp0); //a:2,5,8,11,14,b:1,4,7,10,13,c:0,3,6,9,12,15, + return v; +} + +_NEON2SSESTORAGE uint16x8x3_t vld3q_u16(__transfersize(24) uint16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE uint16x8x3_t vld3q_u16(__transfersize(24) uint16_t const * ptr) // VLD3.16 {d0, d2, d4}, [r0] +{ + //a0, a1,a2,a3,...a7, b0,b1,b2,b3,...b7, c0,c1,c2,c3...c7 -> a0,a3,a6,b1,b4,b7,c2,c5, a1,a4,a7,b2,b5,c0,c3,c6, a2,a5,b0,b3,b6,c1,c4,c7 + uint16x8x3_t v; + __m128i tmp0, tmp1,tmp2, tmp3; + _NEON2SSE_ALIGN_16 static const int8_t mask16_0[16] = {0,1, 6,7, 12,13, 2,3, 8,9, 14,15, 4,5, 10,11}; + _NEON2SSE_ALIGN_16 static const int8_t mask16_1[16] = {2,3, 8,9, 14,15, 4,5, 10,11, 0,1, 6,7, 12,13}; + _NEON2SSE_ALIGN_16 static const int8_t mask16_2[16] = {4,5, 10,11, 0,1, 6,7, 12,13, 2,3, 8,9, 14,15}; + + v.val[0] = vld1q_u16 (ptr); //a0,a1,a2,a3,...a7, + v.val[1] = vld1q_u16 ((ptr + 8)); //b0,b1,b2,b3...b7 + v.val[2] = vld1q_u16 ((ptr + 16)); //c0,c1,c2,c3,...c7 + + tmp0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask16_0); //a0,a3,a6,a1,a4,a7,a2,a5, + tmp1 = _mm_shuffle_epi8(v.val[1], *(__m128i*)mask16_1); //b1,b4,b7,b2,b5,b0,b3,b6 + tmp2 = _mm_shuffle_epi8(v.val[2], *(__m128i*)mask16_2); //c2,c5, c0,c3,c6, c1,c4,c7 + + tmp3 = _mm_slli_si128(tmp0,10); //0,0,0,0,0,a0,a3,a6, + tmp3 = _mm_alignr_epi8(tmp1,tmp3, 10); //a0,a3,a6,b1,b4,b7,x,x + tmp3 = _mm_slli_si128(tmp3, 4); //0,0, a0,a3,a6,b1,b4,b7 + tmp3 = _mm_srli_si128(tmp3, 4); //a0,a3,a6,b1,b4,b7,0,0 + v.val[0] = _mm_slli_si128(tmp2, 12); //0,0,0,0,0,0, c2,c5, + v.val[0] = _mm_or_si128(v.val[0],tmp3); //a0,a3,a6,b1,b4,b7,c2,c5 + + tmp3 = _mm_slli_si128(tmp0, 4); //0,0,a0,a3,a6,a1,a4,a7 + tmp3 = _mm_srli_si128(tmp3,10); //a1,a4,a7, 0,0,0,0,0 + v.val[1] = _mm_srli_si128(tmp1,6); //b2,b5,b0,b3,b6,0,0 + v.val[1] = _mm_slli_si128(v.val[1], 6); //0,0,0,b2,b5,b0,b3,b6, + v.val[1] = _mm_or_si128(v.val[1],tmp3); //a1,a4,a7,b2,b5,b0,b3,b6, + v.val[1] = _mm_slli_si128(v.val[1],6); //0,0,0,a1,a4,a7,b2,b5, + v.val[1] = _mm_srli_si128(v.val[1], 6); //a1,a4,a7,b2,b5,0,0,0, + tmp3 = _mm_srli_si128(tmp2,4); //c0,c3,c6, c1,c4,c7,0,0 + tmp3 = _mm_slli_si128(tmp3,10); //0,0,0,0,0,c0,c3,c6, + v.val[1] = _mm_or_si128(v.val[1],tmp3); //a1,a4,a7,b2,b5,c0,c3,c6, + + tmp3 = _mm_srli_si128(tmp2,10); //c1,c4,c7, 0,0,0,0,0 + tmp3 = _mm_slli_si128(tmp3,10); //0,0,0,0,0, c1,c4,c7, + v.val[2] = _mm_srli_si128(tmp1,10); //b0,b3,b6,0,0, 0,0,0 + v.val[2] = _mm_slli_si128(v.val[2],4); //0,0, b0,b3,b6,0,0,0 + v.val[2] = _mm_or_si128(v.val[2],tmp3); //0,0, b0,b3,b6,c1,c4,c7, + tmp0 = _mm_srli_si128(tmp0, 12); //a2,a5,0,0,0,0,0,0 + v.val[2] = _mm_or_si128(v.val[2],tmp0); //a2,a5,b0,b3,b6,c1,c4,c7, + return v; +} + +_NEON2SSESTORAGE uint32x4x3_t vld3q_u32(__transfersize(12) uint32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE uint32x4x3_t vld3q_u32(__transfersize(12) uint32_t const * ptr) // VLD3.32 {d0, d2, d4}, [r0] +{ + //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3, + uint32x4x3_t v; + __m128i tmp0, tmp1,tmp2, tmp3; + v.val[0] = vld1q_u32 (ptr); //a0,a1,a2,a3, + v.val[1] = vld1q_u32 ((ptr + 4)); //b0,b1,b2,b3 + v.val[2] = vld1q_u32 ((ptr + 8)); //c0,c1,c2,c3, + + tmp0 = _mm_shuffle_epi32(v.val[0], 0 | (3 << 2) | (1 << 4) | (2 << 6)); //a0,a3,a1,a2 + tmp1 = _mm_shuffle_epi32(v.val[1], _SWAP_HI_LOW32); //b2,b3,b0,b1 + tmp2 = _mm_shuffle_epi32(v.val[2], 1 | (2 << 2) | (0 << 4) | (3 << 6)); //c1,c2, c0,c3 + + tmp3 = _mm_unpacklo_epi32(tmp1, tmp2); //b2,c1, b3,c2 + v.val[0] = _mm_unpacklo_epi64(tmp0,tmp3); //a0,a3,b2,c1 + tmp0 = _mm_unpackhi_epi32(tmp0, tmp1); //a1,b0, a2,b1 + v.val[1] = _mm_shuffle_epi32(tmp0, _SWAP_HI_LOW32 ); //a2,b1, a1,b0, + v.val[1] = _mm_unpackhi_epi64(v.val[1], tmp3); //a1,b0, b3,c2 + v.val[2] = _mm_unpackhi_epi64(tmp0, tmp2); //a2,b1, c0,c3 + return v; +} + +_NEON2SSE_GLOBAL int8x16x3_t vld3q_s8(__transfersize(48) int8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0] +#define vld3q_s8(ptr) vld3q_u8((uint8_t*) (ptr)) + +_NEON2SSE_GLOBAL int16x8x3_t vld3q_s16(__transfersize(24) int16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +#define vld3q_s16(ptr) vld3q_u16((uint16_t*) (ptr)) + +_NEON2SSE_GLOBAL int32x4x3_t vld3q_s32(__transfersize(12) int32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0] +#define vld3q_s32(ptr) vld3q_u32((uint32_t*) (ptr)) + +_NEON2SSE_GLOBAL float16x8x3_t vld3q_f16(__transfersize(24) __fp16 const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x4x3_t vld3q_f32(__transfersize(12) float32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE float32x4x3_t vld3q_f32(__transfersize(12) float32_t const * ptr) // VLD3.32 {d0, d2, d4}, [r0] +{ + //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3, + float32x4x3_t v; + __m128 tmp0, tmp1,tmp2, tmp3; + v.val[0] = vld1q_f32 (ptr); //a0,a1,a2,a3, + v.val[1] = vld1q_f32 ((ptr + 4)); //b0,b1,b2,b3 + v.val[2] = vld1q_f32 ((ptr + 8)); //c0,c1,c2,c3, + + tmp0 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v.val[0]), 0 | (3 << 2) | (1 << 4) | (2 << 6))); //a0,a3,a1,a2 + tmp1 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v.val[1]), _SWAP_HI_LOW32)); //b2,b3,b0,b1 + tmp2 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v.val[2]), 1 | (2 << 2) | (0 << 4) | (3 << 6))); //c1,c2, c0,c3 + tmp3 = _mm_unpacklo_ps(tmp1, tmp2); //b2,c1, b3,c2 + + v.val[0] = _mm_movelh_ps(tmp0,tmp3); //a0,a3,b2,c1 + tmp0 = _mm_unpackhi_ps(tmp0, tmp1); //a1,b0, a2,b1 + v.val[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(tmp0), _SWAP_HI_LOW32 )); //a2,b1, a1,b0, + v.val[1] = _mm_movehl_ps(tmp3,v.val[1]); //a1,b0, b3,c2 + v.val[2] = _mm_movehl_ps(tmp2,tmp0); //a2,b1, c0,c3 + return v; +} + +//poly8x16x3_t vld3q_p8(__transfersize(48) poly8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0] +#define vld3q_p8 vld3q_u8 + +_NEON2SSE_GLOBAL poly16x8x3_t vld3q_p16(__transfersize(24) poly16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0] +#define vld3q_p16 vld3q_u16 + +_NEON2SSESTORAGE uint8x8x3_t vld3_u8(__transfersize(24) uint8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE uint8x8x3_t vld3_u8(__transfersize(24) uint8_t const * ptr) // VLD3.8 {d0, d1, d2}, [r0] +{ + //a0, a1,a2,a3,...a7, b0,b1,b2,b3,...b7, c0,c1,c2,c3...c7 -> a0,a3,a6,b1,b4,b7,c2,c5, a1,a4,a7,b2,b5,c0,c3,c6, a2,a5,b0,b3,b6,c1,c4,c7 + uint8x8x3_t v; + __m128i val0, val1, val2, tmp0, tmp1; + _NEON2SSE_ALIGN_16 static const int8_t mask8_0[16] = {0,3,6,9,12,15, 1,4,7,10,13, 2,5,8,11,14}; + _NEON2SSE_ALIGN_16 static const int8_t mask8_1[16] = {2,5, 0,3,6, 1,4,7, 0,0,0,0,0,0,0,0}; + val0 = vld1q_u8 (ptr); //a0,a1,a2,a3,...a7, b0,b1,b2,b3...b7 + val2 = _mm_loadl_epi64((__m128i*)(ptr + 16)); //c0,c1,c2,c3,...c7 + + tmp0 = _mm_shuffle_epi8(val0, *(__m128i*)mask8_0); //a0,a3,a6,b1,b4,b7, a1,a4,a7,b2,b5, a2,a5,b0,b3,b6, + tmp1 = _mm_shuffle_epi8(val2, *(__m128i*)mask8_1); //c2,c5, c0,c3,c6, c1,c4,c7,x,x,x,x,x,x,x,x + val0 = _mm_slli_si128(tmp0,10); + val0 = _mm_srli_si128(val0,10); //a0,a3,a6,b1,b4,b7, 0,0,0,0,0,0,0,0,0,0 + val2 = _mm_slli_si128(tmp1,6); //0,0,0,0,0,0,c2,c5,x,x,x,x,x,x,x,x + val0 = _mm_or_si128(val0,val2); //a0,a3,a6,b1,b4,b7,c2,c5 x,x,x,x,x,x,x,x + _M64(v.val[0], val0); + val1 = _mm_slli_si128(tmp0,5); //0,0,0,0,0,0,0,0,0,0,0, a1,a4,a7,b2,b5, + val1 = _mm_srli_si128(val1,11); //a1,a4,a7,b2,b5,0,0,0,0,0,0,0,0,0,0,0, + val2 = _mm_srli_si128(tmp1,2); //c0,c3,c6,c1,c4,c7,x,x,x,x,x,x,x,x,0,0 + val2 = _mm_slli_si128(val2,5); //0,0,0,0,0,c0,c3,c6,0,0,0,0,0,0,0,0 + val1 = _mm_or_si128(val1,val2); //a1,a4,a7,b2,b5,c0,c3,c6,x,x,x,x,x,x,x,x + _M64(v.val[1], val1); + + tmp0 = _mm_srli_si128(tmp0,11); //a2,a5,b0,b3,b6,0,0,0,0,0,0,0,0,0,0,0, + val2 = _mm_srli_si128(tmp1,5); //c1,c4,c7,0,0,0,0,0,0,0,0,0,0,0,0,0 + val2 = _mm_slli_si128(val2,5); //0,0,0,0,0,c1,c4,c7, + val2 = _mm_or_si128(tmp0, val2); //a2,a5,b0,b3,b6,c1,c4,c7,x,x,x,x,x,x,x,x + _M64(v.val[2], val2); + return v; +} + +_NEON2SSESTORAGE uint16x4x3_t vld3_u16(__transfersize(12) uint16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE uint16x4x3_t vld3_u16(__transfersize(12) uint16_t const * ptr) // VLD3.16 {d0, d1, d2}, [r0] +{ + //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3, + uint16x4x3_t v; + __m128i val0, val1, val2, tmp0, tmp1; + _NEON2SSE_ALIGN_16 static const int8_t mask16[16] = {0,1, 6,7, 12,13, 2,3, 8,9, 14,15, 4,5, 10,11}; + val0 = vld1q_u16 (ptr); //a0,a1,a2,a3, b0,b1,b2,b3 + val2 = _mm_loadl_epi64((__m128i*)(ptr + 8)); //c0,c1,c2,c3, x,x,x,x + + tmp0 = _mm_shuffle_epi8(val0, *(__m128i*)mask16); //a0, a3, b2,a1, b0, b3, a2, b1 + tmp1 = _mm_shufflelo_epi16(val2, 201); //11 00 10 01 : c1, c2, c0, c3, + val0 = _mm_slli_si128(tmp0,10); + val0 = _mm_srli_si128(val0,10); //a0, a3, b2, 0,0, 0,0, + val2 = _mm_slli_si128(tmp1,14); //0,0,0,0,0,0,0,c1 + val2 = _mm_srli_si128(val2,8); //0,0,0,c1,0,0,0,0 + val0 = _mm_or_si128(val0,val2); //a0, a3, b2, c1, x,x,x,x + _M64(v.val[0], val0); + + val1 = _mm_slli_si128(tmp0,4); //0,0,0,0,0,a1, b0, b3 + val1 = _mm_srli_si128(val1,10); //a1, b0, b3, 0,0, 0,0, + val2 = _mm_srli_si128(tmp1,2); //c2, 0,0,0,0,0,0,0, + val2 = _mm_slli_si128(val2,6); //0,0,0,c2,0,0,0,0 + val1 = _mm_or_si128(val1,val2); //a1, b0, b3, c2, x,x,x,x + _M64(v.val[1], val1); + + tmp0 = _mm_srli_si128(tmp0,12); //a2, b1,0,0,0,0,0,0 + tmp1 = _mm_srli_si128(tmp1,4); + tmp1 = _mm_slli_si128(tmp1,4); //0,0,c0, c3, + val2 = _mm_or_si128(tmp0, tmp1); //a2, b1, c0, c3, + _M64(v.val[2], val2); + return v; +} + +_NEON2SSESTORAGE uint32x2x3_t vld3_u32(__transfersize(6) uint32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE uint32x2x3_t vld3_u32(__transfersize(6) uint32_t const * ptr) // VLD3.32 {d0, d1, d2}, [r0] +{ + //a0,a1, b0,b1, c0,c1, -> a0,b1, a1,c0, b0,c1 + uint32x2x3_t v; + __m128i val0, val1, val2; + val0 = vld1q_u32 (ptr); //a0,a1, b0,b1, + val2 = _mm_loadl_epi64((__m128i*) (ptr + 4)); //c0,c1, x,x + + val0 = _mm_shuffle_epi32(val0, 0 | (3 << 2) | (1 << 4) | (2 << 6)); //a0,b1, a1, b0 + _M64(v.val[0], val0); + val2 = _mm_slli_si128(val2, 8); //x, x,c0,c1, + val1 = _mm_unpackhi_epi32(val0,val2); //a1,c0, b0, c1 + _M64(v.val[1], val1); + val2 = _mm_srli_si128(val1, 8); //b0, c1, x, x, + _M64(v.val[2], val2); + return v; +} +_NEON2SSESTORAGE uint64x1x3_t vld3_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE uint64x1x3_t vld3_u64(__transfersize(3) uint64_t const * ptr) // VLD1.64 {d0, d1, d2}, [r0] +{ + uint64x1x3_t v; + v.val[0].m64_u64[0] = *(ptr); + v.val[1].m64_u64[0] = *(ptr + 1); + v.val[2].m64_u64[0] = *(ptr + 2); + return v; +} + +_NEON2SSE_GLOBAL int8x8x3_t vld3_s8(__transfersize(24) int8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0] +#define vld3_s8(ptr) vld3_u8((uint8_t*)ptr) + +_NEON2SSE_GLOBAL int16x4x3_t vld3_s16(__transfersize(12) int16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +#define vld3_s16(ptr) vld3_u16((uint16_t*)ptr) + +_NEON2SSE_GLOBAL int32x2x3_t vld3_s32(__transfersize(6) int32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0] +#define vld3_s32(ptr) vld3_u32((uint32_t*)ptr) + +//int64x1x3_t vld3_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +#define vld3_s64(ptr) vld3_u64((uint64_t*)ptr) + +_NEON2SSE_GLOBAL float16x4x3_t vld3_f16(__transfersize(12) __fp16 const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x2x3_t vld3_f32(__transfersize(6) float32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE float32x2x3_t vld3_f32(__transfersize(6) float32_t const * ptr) +{ + //a0,a1, b0,b1, c0,c1, -> a0,b1, a1,c0, b0,c1 + float32x2x3_t v; + v.val[0].m64_f32[0] = *(ptr); + v.val[0].m64_f32[1] = *(ptr + 3); + + v.val[1].m64_f32[0] = *(ptr + 1); + v.val[1].m64_f32[1] = *(ptr + 4); + + v.val[2].m64_f32[0] = *(ptr + 2); + v.val[2].m64_f32[1] = *(ptr + 5); + return v; +} + +_NEON2SSE_GLOBAL poly8x8x3_t vld3_p8(__transfersize(24) poly8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0] +#define vld3_p8 vld3_u8 + +_NEON2SSE_GLOBAL poly16x4x3_t vld3_p16(__transfersize(12) poly16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0] +#define vld3_p16 vld3_u16 + +//*************** Quadruples load ******************************** +//***************************************************************** +_NEON2SSESTORAGE uint8x16x4_t vld4q_u8(__transfersize(64) uint8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE uint8x16x4_t vld4q_u8(__transfersize(64) uint8_t const * ptr) // VLD4.8 {d0, d2, d4, d6}, [r0] +{ + uint8x16x4_t v; + __m128i tmp3, tmp2, tmp1, tmp0; + + v.val[0] = vld1q_u8 ( ptr); //a0,a1,a2,...a7, ...a15 + v.val[1] = vld1q_u8 ( (ptr + 16)); //b0, b1,b2,...b7.... b15 + v.val[2] = vld1q_u8 ( (ptr + 32)); //c0, c1,c2,...c7....c15 + v.val[3] = vld1q_u8 ( (ptr + 48)); //d0,d1,d2,...d7....d15 + + tmp0 = _mm_unpacklo_epi8(v.val[0],v.val[1]); //a0,b0, a1,b1, a2,b2, a3,b3,....a7,b7 + tmp1 = _mm_unpacklo_epi8(v.val[2],v.val[3]); //c0,d0, c1,d1, c2,d2, c3,d3,... c7,d7 + tmp2 = _mm_unpackhi_epi8(v.val[0],v.val[1]); //a8,b8, a9,b9, a10,b10, a11,b11,...a15,b15 + tmp3 = _mm_unpackhi_epi8(v.val[2],v.val[3]); //c8,d8, c9,d9, c10,d10, c11,d11,...c15,d15 + + v.val[0] = _mm_unpacklo_epi8(tmp0, tmp2); //a0,a8, b0,b8, a1,a9, b1,b9, ....a3,a11, b3,b11 + v.val[1] = _mm_unpackhi_epi8(tmp0, tmp2); //a4,a12, b4,b12, a5,a13, b5,b13,....a7,a15,b7,b15 + v.val[2] = _mm_unpacklo_epi8(tmp1, tmp3); //c0,c8, d0,d8, c1,c9, d1,d9.....d3,d11 + v.val[3] = _mm_unpackhi_epi8(tmp1, tmp3); //c4,c12,d4,d12, c5,c13, d5,d13,....d7,d15 + + tmp0 = _mm_unpacklo_epi32(v.val[0], v.val[2] ); ///a0,a8, b0,b8, c0,c8, d0,d8, a1,a9, b1,b9, c1,c9, d1,d9 + tmp1 = _mm_unpackhi_epi32(v.val[0], v.val[2] ); //a2,a10, b2,b10, c2,c10, d2,d10, a3,a11, b3,b11, c3,c11, d3,d11 + tmp2 = _mm_unpacklo_epi32(v.val[1], v.val[3] ); //a4,a12, b4,b12, c4,c12, d4,d12, a5,a13, b5,b13, c5,c13, d5,d13, + tmp3 = _mm_unpackhi_epi32(v.val[1], v.val[3] ); //a6,a14, b6,b14, c6,c14, d6,d14, a7,a15,b7,b15,c7,c15,d7,d15 + + v.val[0] = _mm_unpacklo_epi8(tmp0, tmp2); //a0,a4,a8,a12,b0,b4,b8,b12,c0,c4,c8,c12,d0,d4,d8,d12 + v.val[1] = _mm_unpackhi_epi8(tmp0, tmp2); //a1,a5, a9, a13, b1,b5, b9,b13, c1,c5, c9, c13, d1,d5, d9,d13 + v.val[2] = _mm_unpacklo_epi8(tmp1, tmp3); //a2,a6, a10,a14, b2,b6, b10,b14,c2,c6, c10,c14, d2,d6, d10,d14 + v.val[3] = _mm_unpackhi_epi8(tmp1, tmp3); //a3,a7, a11,a15, b3,b7, b11,b15,c3,c7, c11, c15,d3,d7, d11,d15 + return v; +} + +_NEON2SSESTORAGE uint16x8x4_t vld4q_u16(__transfersize(32) uint16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE uint16x8x4_t vld4q_u16(__transfersize(32) uint16_t const * ptr) // VLD4.16 {d0, d2, d4, d6}, [r0] +{ + uint16x8x4_t v; + __m128i tmp3, tmp2, tmp1, tmp0; + tmp0 = vld1q_u16 (ptr); //a0,a1,a2,...a7 + tmp1 = vld1q_u16 ((ptr + 8)); //b0, b1,b2,...b7 + tmp2 = vld1q_u16 ((ptr + 16)); //c0, c1,c2,...c7 + tmp3 = vld1q_u16 ((ptr + 24)); //d0,d1,d2,...d7 + v.val[0] = _mm_unpacklo_epi16(tmp0,tmp1); //a0,b0, a1,b1, a2,b2, a3,b3, + v.val[1] = _mm_unpacklo_epi16(tmp2,tmp3); //c0,d0, c1,d1, c2,d2, c3,d3, + v.val[2] = _mm_unpackhi_epi16(tmp0,tmp1); //a4,b4, a5,b5, a6,b6, a7,b7 + v.val[3] = _mm_unpackhi_epi16(tmp2,tmp3); //c4,d4, c5,d5, c6,d6, c7,d7 + tmp0 = _mm_unpacklo_epi16(v.val[0], v.val[2]); //a0,a4, b0,b4, a1,a5, b1,b5 + tmp1 = _mm_unpackhi_epi16(v.val[0], v.val[2]); //a2,a6, b2,b6, a3,a7, b3,b7 + tmp2 = _mm_unpacklo_epi16(v.val[1], v.val[3]); //c0,c4, d0,d4, c1,c5, d1,d5 + tmp3 = _mm_unpackhi_epi16(v.val[1], v.val[3]); //c2,c6, d2,d6, c3,c7, d3,d7 + v.val[0] = _mm_unpacklo_epi64(tmp0, tmp2); //a0,a4, b0,b4, c0,c4, d0,d4, + v.val[1] = _mm_unpackhi_epi64(tmp0, tmp2); //a1,a5, b1,b5, c1,c5, d1,d5 + v.val[2] = _mm_unpacklo_epi64(tmp1, tmp3); //a2,a6, b2,b6, c2,c6, d2,d6, + v.val[3] = _mm_unpackhi_epi64(tmp1, tmp3); //a3,a7, b3,b7, c3,c7, d3,d7 + return v; +} + +_NEON2SSESTORAGE uint32x4x4_t vld4q_u32(__transfersize(16) uint32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE uint32x4x4_t vld4q_u32(__transfersize(16) uint32_t const * ptr) // VLD4.32 {d0, d2, d4, d6}, [r0] +{ + uint32x4x4_t v; + __m128i tmp3, tmp2, tmp1, tmp0; + v.val[0] = vld1q_u32 (ptr); + v.val[1] = vld1q_u32 ((ptr + 4)); + v.val[2] = vld1q_u32 ((ptr + 8)); + v.val[3] = vld1q_u32 ((ptr + 12)); + tmp0 = _mm_unpacklo_epi32(v.val[0],v.val[1]); + tmp1 = _mm_unpacklo_epi32(v.val[2],v.val[3]); + tmp2 = _mm_unpackhi_epi32(v.val[0],v.val[1]); + tmp3 = _mm_unpackhi_epi32(v.val[2],v.val[3]); + v.val[0] = _mm_unpacklo_epi64(tmp0, tmp1); + v.val[1] = _mm_unpackhi_epi64(tmp0, tmp1); + v.val[2] = _mm_unpacklo_epi64(tmp2, tmp3); + v.val[3] = _mm_unpackhi_epi64(tmp2, tmp3); + return v; +} + +_NEON2SSE_GLOBAL int8x16x4_t vld4q_s8(__transfersize(64) int8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0] +#define vld4q_s8(ptr) vld4q_u8((uint8_t*)ptr) + +_NEON2SSE_GLOBAL int16x8x4_t vld4q_s16(__transfersize(32) int16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +#define vld4q_s16(ptr) vld4q_u16((uint16_t*)ptr) + +_NEON2SSE_GLOBAL int32x4x4_t vld4q_s32(__transfersize(16) int32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0] +#define vld4q_s32(ptr) vld4q_u32((uint32_t*)ptr) + +_NEON2SSE_GLOBAL float16x8x4_t vld4q_f16(__transfersize(32) __fp16 const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x4x4_t vld4q_f32(__transfersize(16) float32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE float32x4x4_t vld4q_f32(__transfersize(16) float32_t const * ptr) // VLD4.32 {d0, d2, d4, d6}, [r0] +{ + float32x4x4_t v; + __m128 tmp3, tmp2, tmp1, tmp0; + + v.val[0] = vld1q_f32 ((float*) ptr); + v.val[1] = vld1q_f32 ((float*) (ptr + 4)); + v.val[2] = vld1q_f32 ((float*) (ptr + 8)); + v.val[3] = vld1q_f32 ((float*) (ptr + 12)); + tmp0 = _mm_unpacklo_ps(v.val[0], v.val[1]); + tmp2 = _mm_unpacklo_ps(v.val[2], v.val[3]); + tmp1 = _mm_unpackhi_ps(v.val[0], v.val[1]); + tmp3 = _mm_unpackhi_ps(v.val[2], v.val[3]); + v.val[0] = _mm_movelh_ps(tmp0, tmp2); + v.val[1] = _mm_movehl_ps(tmp2, tmp0); + v.val[2] = _mm_movelh_ps(tmp1, tmp3); + v.val[3] = _mm_movehl_ps(tmp3, tmp1); + return v; +} + +_NEON2SSE_GLOBAL poly8x16x4_t vld4q_p8(__transfersize(64) poly8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0] +#define vld4q_p8 vld4q_u8 + +_NEON2SSE_GLOBAL poly16x8x4_t vld4q_p16(__transfersize(32) poly16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0] +#define vld4q_p16 vld4q_s16 + +_NEON2SSESTORAGE uint8x8x4_t vld4_u8(__transfersize(32) uint8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE uint8x8x4_t vld4_u8(__transfersize(32) uint8_t const * ptr) // VLD4.8 {d0, d1, d2, d3}, [r0] +{ + uint8x8x4_t v; + __m128i sh0, sh1; + __m128i val0, val2; + _NEON2SSE_ALIGN_16 static const int8_t mask4_8[16] = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; + + val0 = vld1q_u8(( ptr)); //load first 64-bits in val[0] and val[1] + val2 = vld1q_u8(( ptr + 16)); //load third and forth 64-bits in val[2], val[3] + + sh0 = _mm_shuffle_epi8(val0, *(__m128i*)mask4_8); + sh1 = _mm_shuffle_epi8(val2, *(__m128i*)mask4_8); + val0 = _mm_unpacklo_epi32(sh0,sh1); //0,4,8,12,16,20,24,28, 1,5,9,13,17,21,25,29 + vst1q_u8(&v.val[0], val0 ); + val2 = _mm_unpackhi_epi32(sh0,sh1); //2,6,10,14,18,22,26,30, 3,7,11,15,19,23,27,31 + vst1q_u8(&v.val[2], val2 ); + return v; +} + +_NEON2SSESTORAGE uint16x4x4_t vld4_u16(__transfersize(16) uint16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE uint16x4x4_t vld4_u16(__transfersize(16) uint16_t const * ptr) // VLD4.16 {d0, d1, d2, d3}, [r0] +{ + uint16x4x4_t v; + __m128i sh0, sh1; + __m128i val0, val2; + _NEON2SSE_ALIGN_16 static const int8_t mask4_16[16] = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15}; //0, 4, 1, 5, 2, 6, 3, 7 + val0 = vld1q_u16 ( (ptr)); //load first 64-bits in val[0] and val[1] + val2 = vld1q_u16 ( (ptr + 8)); //load third and forth 64-bits in val[2], val[3] + sh0 = _mm_shuffle_epi8(val0, *(__m128i*)mask4_16); + sh1 = _mm_shuffle_epi8(val2, *(__m128i*)mask4_16); + val0 = _mm_unpacklo_epi32(sh0,sh1); //0,4,8,12, 1,5,9,13 + vst1q_u16(&v.val[0], val0 ); + val2 = _mm_unpackhi_epi32(sh0,sh1); //2,6,10,14, 3,7,11,15 + vst1q_u16(&v.val[2], val2 ); + return v; +} + +_NEON2SSESTORAGE uint32x2x4_t vld4_u32(__transfersize(8) uint32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE uint32x2x4_t vld4_u32(__transfersize(8) uint32_t const * ptr) +{ + //a0,a1, b0,b1, c0,c1, d0,d1 -> a0,c0, a1,c1, b0,d0, b1,d1 + uint32x2x4_t v; + __m128i val0, val01, val2; + val0 = vld1q_u32 (ptr); //a0,a1, b0,b1, + val2 = vld1q_u32 ((ptr + 4)); //c0,c1, d0,d1 + val01 = _mm_unpacklo_epi32(val0,val2); //a0, c0, a1,c1, + val2 = _mm_unpackhi_epi32(val0,val2); //b0,d0, b1, d1 + vst1q_u32(&v.val[0], val01); + vst1q_u32(&v.val[2], val2 ); + return v; +} + +_NEON2SSESTORAGE uint64x1x4_t vld4_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE uint64x1x4_t vld4_u64(__transfersize(4) uint64_t const * ptr) // VLD1.64 {d0, d1, d2, d3}, [r0] +{ + uint64x1x4_t v; + v.val[0].m64_u64[0] = *(ptr); //load first 64-bits in val[0] and val[1] + v.val[1].m64_u64[0] = *(ptr + 1); //load first 64-bits in val[0] and val[1] + v.val[2].m64_u64[0] = *(ptr + 2); //load third and forth 64-bits in val[2], val[3] + v.val[3].m64_u64[0] = *(ptr + 3); //load third and forth 64-bits in val[2], val[3] + return v; +} + +_NEON2SSE_GLOBAL int8x8x4_t vld4_s8(__transfersize(32) int8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0] +#define vld4_s8(ptr) vld4_u8((uint8_t*)ptr) + +_NEON2SSE_GLOBAL int16x4x4_t vld4_s16(__transfersize(16) int16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +#define vld4_s16(ptr) vld4_u16((uint16_t*)ptr) + +_NEON2SSE_GLOBAL int32x2x4_t vld4_s32(__transfersize(8) int32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0] +#define vld4_s32(ptr) vld4_u32((uint32_t*)ptr) + +//int64x1x4_t vld4_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +#define vld4_s64(ptr) vld4_u64((uint64_t*)ptr) + +_NEON2SSE_GLOBAL float16x4x4_t vld4_f16(__transfersize(16) __fp16 const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x2x4_t vld4_f32(__transfersize(8) float32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE float32x2x4_t vld4_f32(__transfersize(8) float32_t const * ptr) // VLD4.32 {d0, d1, d2, d3}, [r0] +{ + //a0,a1, b0,b1, c0,c1, d0,d1 -> a0,c0, a1,c1, b0,d0, b1,d1 + float32x2x4_t res; + res.val[0].m64_f32[0] = *(ptr); + res.val[0].m64_f32[1] = *(ptr + 4); + res.val[1].m64_f32[0] = *(ptr + 1); + res.val[1].m64_f32[1] = *(ptr + 5); + res.val[2].m64_f32[0] = *(ptr + 2); + res.val[2].m64_f32[1] = *(ptr + 6); + res.val[3].m64_f32[0] = *(ptr + 3); + res.val[3].m64_f32[1] = *(ptr + 7); + return res; +} + +_NEON2SSE_GLOBAL poly8x8x4_t vld4_p8(__transfersize(32) poly8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0] +#define vld4_p8 vld4_u8 + +_NEON2SSE_GLOBAL poly16x4x4_t vld4_p16(__transfersize(16) poly16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0] +#define vld4_p16 vld4_u16 + +//************* Duplicate (or propagate) ptr[0] to all val[0] lanes and ptr[1] to all val[1] lanes ******************* +//******************************************************************************************************************* +_NEON2SSESTORAGE uint8x8x2_t vld2_dup_u8(__transfersize(2) uint8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0] +_NEON2SSE_INLINE uint8x8x2_t vld2_dup_u8(__transfersize(2) uint8_t const * ptr) // VLD2.8 {d0[], d1[]}, [r0] +{ + uint8x8x2_t v; + __m128i val0, val1; + val0 = _mm_cvtsi32_si128(*(uint16_t*)ptr); //0,1,x,x, x,x,x,x,x,x,x,x, x,x,x,x + val1 = _mm_unpacklo_epi8(val0,val0); //0,0,1,1,x,x,x,x, x,x,x,x,x,x,x,x, + val1 = _mm_unpacklo_epi16(val1,val1); //0,0,0,0, 1,1,1,1,x,x,x,x, x,x,x,x + val0 = _mm_unpacklo_epi32(val1,val1); //0,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1, + vst1q_u8(v.val, val0); + return v; +} + +_NEON2SSESTORAGE uint16x4x2_t vld2_dup_u16(__transfersize(2) uint16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +_NEON2SSE_INLINE uint16x4x2_t vld2_dup_u16(__transfersize(2) uint16_t const * ptr) // VLD2.16 {d0[], d1[]}, [r0] +{ + uint16x4x2_t v; + __m128i val0, val1; + val1 = _mm_cvtsi32_si128(*(uint32_t*)ptr); //0,1,x,x, x,x,x,x + val0 = _mm_shufflelo_epi16(val1, 0); //00 00 00 00 (all 0) + _M64(v.val[0], val0); + val1 = _mm_shufflelo_epi16(val1, 85); //01 01 01 01 (all 1) + _M64(v.val[1], val1); + return v; +} + +_NEON2SSESTORAGE uint32x2x2_t vld2_dup_u32(__transfersize(2) uint32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0] +_NEON2SSE_INLINE uint32x2x2_t vld2_dup_u32(__transfersize(2) uint32_t const * ptr) // VLD2.32 {d0[], d1[]}, [r0] +{ + uint32x2x2_t v; + __m128i val0; + val0 = _mm_loadl_epi64((__m128i*)ptr); //0,1,x,x + val0 = _mm_shuffle_epi32(val0, 0 | (0 << 2) | (1 << 4) | (1 << 6)); //0,0,1,1 + vst1q_u32(v.val, val0); + return v; +} + +_NEON2SSE_GLOBAL uint64x1x2_t vld2_dup_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +#define vld2_dup_u64 vld2_u64 + +_NEON2SSE_GLOBAL int8x8x2_t vld2_dup_s8(__transfersize(2) int8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0] +#define vld2_dup_s8(ptr) vld2_dup_u8((uint8_t*)ptr) + +_NEON2SSE_GLOBAL int16x4x2_t vld2_dup_s16(__transfersize(2) int16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +#define vld2_dup_s16(ptr) vld2_dup_u16((uint16_t*)ptr) + +_NEON2SSE_GLOBAL int32x2x2_t vld2_dup_s32(__transfersize(2) int32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0] +#define vld2_dup_s32(ptr) vld2_dup_u32((uint32_t*)ptr) + +_NEON2SSE_GLOBAL int64x1x2_t vld2_dup_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0] +#define vld2_dup_s64(ptr) vld2_dup_u64((uint64_t*)ptr) + +_NEON2SSE_GLOBAL float16x4x2_t vld2_dup_f16(__transfersize(2) __fp16 const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x2x2_t vld2_dup_f32(__transfersize(2) float32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0] +_NEON2SSE_INLINE float32x2x2_t vld2_dup_f32(__transfersize(2) float32_t const * ptr) // VLD2.32 {d0[], d1[]}, [r0] +{ + float32x2x2_t v; + v.val[0].m64_f32[0] = *(ptr); //0,0 + v.val[0].m64_f32[1] = *(ptr); //0,0 + v.val[1].m64_f32[0] = *(ptr + 1); //1,1 + v.val[1].m64_f32[1] = *(ptr + 1); //1,1 + return v; +} + +_NEON2SSE_GLOBAL poly8x8x2_t vld2_dup_p8(__transfersize(2) poly8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0] +#define vld2_dup_p8 vld2_dup_u8 + +_NEON2SSE_GLOBAL poly16x4x2_t vld2_dup_p16(__transfersize(2) poly16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0] +#define vld2_dup_p16 vld2_dup_s16 + +//************* Duplicate (or propagate)triplets: ******************* +//******************************************************************** +//ptr[0] to all val[0] lanes, ptr[1] to all val[1] lanes and ptr[2] to all val[2] lanes +_NEON2SSESTORAGE uint8x8x3_t vld3_dup_u8(__transfersize(3) uint8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_INLINE uint8x8x3_t vld3_dup_u8(__transfersize(3) uint8_t const * ptr) // VLD3.8 {d0[], d1[], d2[]}, [r0] +{ + uint8x8x3_t v; + __m128i val0, val1, val2; + val0 = _mm_cvtsi32_si128(*(uint32_t*)ptr); //0,1,2,x, x,x,x,x,x,x,x,x, x,x,x,x + val1 = _mm_unpacklo_epi8(val0,val0); //0,0,1,1,2,2,x,x, x,x,x,x,x,x,x,x, + val1 = _mm_unpacklo_epi16(val1,val1); //0,0,0,0, 1,1,1,1,2,2,2,2,x,x,x,x, + val0 = _mm_unpacklo_epi32(val1,val1); //0,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1, + val2 = _mm_unpackhi_epi32(val1,val1); // 2,2,2,2,2,2,2,2, x,x,x,x,x,x,x,x, + vst1q_u8(v.val, val0); + _M64(v.val[2], val2); + return v; +} + +_NEON2SSESTORAGE uint16x4x3_t vld3_dup_u16(__transfersize(3) uint16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_INLINE uint16x4x3_t vld3_dup_u16(__transfersize(3) uint16_t const * ptr) // VLD3.16 {d0[], d1[], d2[]}, [r0] +{ + uint16x4x3_t v; + __m128i val0, val1, val2; + val2 = _mm_loadl_epi64((__m128i*) ptr); //0,1,2,x, x,x,x,x + val0 = _mm_shufflelo_epi16(val2, 0); //00 00 00 00 (all 0) + val1 = _mm_shufflelo_epi16(val2, 85); //01 01 01 01 (all 1) + val2 = _mm_shufflelo_epi16(val2, 170); //10 10 10 10 (all 2) + _M64(v.val[0], val0); + _M64(v.val[1], val1); + _M64(v.val[2], val2); + return v; +} + +_NEON2SSESTORAGE uint32x2x3_t vld3_dup_u32(__transfersize(3) uint32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_INLINE uint32x2x3_t vld3_dup_u32(__transfersize(3) uint32_t const * ptr) // VLD3.32 {d0[], d1[], d2[]}, [r0] +{ + uint32x2x3_t v; + __m128i val0, val1, val2; + val2 = LOAD_SI128(ptr); //0,1,2,x + val0 = _mm_shuffle_epi32(val2, 0 | (0 << 2) | (2 << 4) | (2 << 6)); //0,0,2,2 + val1 = _mm_shuffle_epi32(val2, 1 | (1 << 2) | (2 << 4) | (2 << 6)); //1,1,2,2 + val2 = _mm_srli_si128(val0, 8); //2,2,0x0,0x0 + _M64(v.val[0], val0); + _M64(v.val[1], val1); + _M64(v.val[2], val2); + return v; +} + +_NEON2SSESTORAGE uint64x1x3_t vld3_dup_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE uint64x1x3_t vld3_dup_u64(__transfersize(3) uint64_t const * ptr) // VLD1.64 {d0, d1, d2}, [r0] +{ + uint64x1x3_t v; + v.val[0].m64_u64[0] = *(ptr); + v.val[1].m64_u64[0] = *(ptr + 1); + v.val[2].m64_u64[0] = *(ptr + 2); + return v; +} + +_NEON2SSE_GLOBAL int8x8x3_t vld3_dup_s8(__transfersize(3) int8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0] +#define vld3_dup_s8(ptr) vld3_dup_u8((uint8_t*)ptr) + +_NEON2SSE_GLOBAL int16x4x3_t vld3_dup_s16(__transfersize(3) int16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +#define vld3_dup_s16(ptr) vld3_dup_u16((uint16_t*)ptr) + +_NEON2SSE_GLOBAL int32x2x3_t vld3_dup_s32(__transfersize(3) int32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0] +#define vld3_dup_s32(ptr) vld3_dup_u32((uint32_t*)ptr) + +//int64x1x3_t vld3_dup_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0] +#define vld3_dup_s64(ptr) vld3_dup_u64((uint64_t*)ptr) + + +_NEON2SSE_GLOBAL float16x4x3_t vld3_dup_f16(__transfersize(3) __fp16 const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x2x3_t vld3_dup_f32(__transfersize(3) float32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0] +_NEON2SSE_INLINE float32x2x3_t vld3_dup_f32(__transfersize(3) float32_t const * ptr) // VLD3.32 {d0[], d1[], d2[]}, [r0] +{ + float32x2x3_t v; + int i; + for (i = 0; i<3; i++) { + v.val[i].m64_f32[0] = *(ptr + i); + v.val[i].m64_f32[1] = *(ptr + i); + } + return v; +} + +_NEON2SSE_GLOBAL poly8x8x3_t vld3_dup_p8(__transfersize(3) poly8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0] +#define vld3_dup_p8 vld3_dup_u8 + +_NEON2SSE_GLOBAL poly16x4x3_t vld3_dup_p16(__transfersize(3) poly16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0] +#define vld3_dup_p16 vld3_dup_s16 + + +//************* Duplicate (or propagate) quadruples: ******************* +//*********************************************************************** +//ptr[0] to all val[0] lanes, ptr[1] to all val[1] lanes, ptr[2] to all val[2] lanes and ptr[3] to all val[3] lanes +_NEON2SSESTORAGE uint8x8x4_t vld4_dup_u8(__transfersize(4) uint8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_INLINE uint8x8x4_t vld4_dup_u8(__transfersize(4) uint8_t const * ptr) // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0] +{ + uint8x8x4_t v; + __m128i val0, val1, val2; + val0 = _mm_cvtsi32_si128(*(uint32_t*)ptr); //0,1,2,3, x,x,x,x,x,x,x,x, x,x,x,x + val1 = _mm_unpacklo_epi8(val0,val0); //0,0,1,1,2,2,3,3, x,x,x,x,x,x,x,x, + val1 = _mm_unpacklo_epi16(val1,val1); //0,0,0,0, 1,1,1,1,2,2,2,2,3,3,3,3 + val0 = _mm_unpacklo_epi32(val1,val1); //0,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1, + val2 = _mm_unpackhi_epi32(val1,val1); // 2,2,2,2,2,2,2,2, 3,3,3,3, 3,3,3,3 + vst1q_u8(&v.val[0], val0); + vst1q_u8(&v.val[2], val2); + return v; +} + +_NEON2SSESTORAGE uint16x4x4_t vld4_dup_u16(__transfersize(4) uint16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_INLINE uint16x4x4_t vld4_dup_u16(__transfersize(4) uint16_t const * ptr) // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +{ + uint16x4x4_t v; + __m128i val0, val1, val2, val3; + val3 = _mm_loadl_epi64((__m128i*)ptr); //0,1,2,3, x,x,x,x + val0 = _mm_shufflelo_epi16(val3, 0); //00 00 00 00 (all 0) + val1 = _mm_shufflelo_epi16(val3, 85); //01 01 01 01 (all 1) + val2 = _mm_shufflelo_epi16(val3, 170); //10 10 10 10 (all 2) + val3 = _mm_shufflelo_epi16(val3, 255); //11 11 11 11 (all 3) + _M64(v.val[0], val0); + _M64(v.val[1], val1); + _M64(v.val[2], val2); + _M64(v.val[3], val3); + return v; +} + +_NEON2SSESTORAGE uint32x2x4_t vld4_dup_u32(__transfersize(4) uint32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_INLINE uint32x2x4_t vld4_dup_u32(__transfersize(4) uint32_t const * ptr) // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +{ + uint32x2x4_t v; + __m128i val0, val1, val2, val3; + val3 = LOAD_SI128(ptr); //0,1,2,3 + val0 = _mm_shuffle_epi32(val3, 0 | (0 << 2) | (2 << 4) | (3 << 6)); //0,0,2,3 + val1 = _mm_shuffle_epi32(val3, 1 | (1 << 2) | (2 << 4) | (3 << 6)); //1,1,2,3 + val2 = _mm_shuffle_epi32(val3, 2 | (2 << 2) | (3 << 4) | (3 << 6)); //2,2,3,3 + val3 = _mm_shuffle_epi32(val3, 3 | (3 << 2) | (3 << 4) | (3 << 6)); //3,3,2,2 + _M64(v.val[0], val0); + _M64(v.val[1], val1); + _M64(v.val[2], val2); + _M64(v.val[3], val3); + return v; +} + +_NEON2SSESTORAGE uint64x1x4_t vld4_dup_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE uint64x1x4_t vld4_dup_u64(__transfersize(4) uint64_t const * ptr) // VLD1.64 {d0, d1, d2, d3}, [r0] +{ + uint64x1x4_t v; + v.val[0].m64_u64[0] = *(ptr); + v.val[1].m64_u64[0] = *(ptr + 1); + v.val[2].m64_u64[0] = *(ptr + 2); + v.val[3].m64_u64[0] = *(ptr + 3); + return v; +} + +_NEON2SSE_GLOBAL int8x8x4_t vld4_dup_s8(__transfersize(4) int8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0] +#define vld4_dup_s8(ptr) vld4_dup_u8((uint8_t*)ptr) + +_NEON2SSE_GLOBAL int16x4x4_t vld4_dup_s16(__transfersize(4) int16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +#define vld4_dup_s16(ptr) vld4_dup_u16((uint16_t*)ptr) + +_NEON2SSE_GLOBAL int32x2x4_t vld4_dup_s32(__transfersize(4) int32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +#define vld4_dup_s32(ptr) vld4_dup_u32((uint32_t*)ptr) + +//int64x1x4_t vld4_dup_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0] +#define vld4_dup_s64(ptr) vld4_dup_u64((uint64_t*)ptr) + +_NEON2SSE_GLOBAL float16x4x4_t vld4_dup_f16(__transfersize(4) __fp16 const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE float32x2x4_t vld4_dup_f32(__transfersize(4) float32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +_NEON2SSE_INLINE float32x2x4_t vld4_dup_f32(__transfersize(4) float32_t const * ptr) // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0] +{ + float32x2x4_t v; + int i; + for (i = 0; i<4; i++) { + v.val[i].m64_f32[0] = *(ptr + i); + v.val[i].m64_f32[1] = *(ptr + i); + } + return v; +} + +_NEON2SSE_GLOBAL poly8x8x4_t vld4_dup_p8(__transfersize(4) poly8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0] +#define vld4_dup_p8 vld4_dup_u8 + +_NEON2SSE_GLOBAL poly16x4x4_t vld4_dup_p16(__transfersize(4) poly16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0] +#define vld4_dup_p16 vld4_dup_u16 + + +//********************************************************************************** +//*******************Lane loads for an N-element structures *********************** +//********************************************************************************** +//********************** Lane pairs ************************************************ +//does vld1_lane_xx ptr[0] to src->val[0] at lane position and ptr[1] to src->val[1] at lane position +//we assume src is 16 bit aligned + +//!!!!!! Microsoft compiler does not allow xxxxxx_2t function arguments resulting in "formal parameter with __declspec(align('16')) won't be aligned" error +//to fix it the all functions below work with xxxxxx_2t pointers and the corresponding original functions are redefined + +//uint16x8x2_t vld2q_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x8x2_t src,__constrange(0,7) int lane);// VLD2.16 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2_t* src,__constrange(0,7) int lane) // VLD2.16 {d0[0], d2[0]}, [r0] +{ + uint16x8x2_t v; + v.val[0] = vld1q_lane_s16 (ptr, src->val[0], lane); + v.val[1] = vld1q_lane_s16 ((ptr + 1), src->val[1], lane); + return v; +} +#define vld2q_lane_u16(ptr, src, lane) vld2q_lane_u16_ptr(ptr, &src, lane) + +//uint32x4x2_t vld2q_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x4x2_t src,__constrange(0,3) int lane);// VLD2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE uint32x4x2_t vld2q_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x4x2_t* src,__constrange(0,3) int lane) // VLD2.32 {d0[0], d2[0]}, [r0] +{ + uint32x4x2_t v; + v.val[0] = _MM_INSERT_EPI32 (src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI32 (src->val[1], ptr[1], lane); + return v; +} +#define vld2q_lane_u32(ptr, src, lane) vld2q_lane_u32_ptr(ptr, &src, lane) + +//int16x8x2_t vld2q_lane_s16(__transfersize(2) int16_t const * ptr, int16x8x2_t src, __constrange(0,7)int lane);// VLD2.16 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE int16x8x2_t vld2q_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x8x2_t* src, __constrange(0,7) int lane) +{ + int16x8x2_t v; + v.val[0] = vld1q_lane_s16 (ptr, src->val[0], lane); + v.val[1] = vld1q_lane_s16 ((ptr + 1), src->val[1], lane); + return v; +} +#define vld2q_lane_s16(ptr, src, lane) vld2q_lane_s16_ptr(ptr, &src, lane) + +//int32x4x2_t vld2q_lane_s32(__transfersize(2) int32_t const * ptr, int32x4x2_t src, __constrange(0,3)int lane);// VLD2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE int32x4x2_t vld2q_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x4x2_t* src, __constrange(0,3) int lane) +{ + int32x4x2_t v; + v.val[0] = _MM_INSERT_EPI32 (src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI32 (src->val[1], ptr[1], lane); + return v; +} +#define vld2q_lane_s32(ptr, src, lane) vld2q_lane_s32_ptr(ptr, &src, lane) + +//float16x8x2_t vld2q_lane_f16(__transfersize(2) __fp16 const * ptr, float16x8x2_t src, __constrange(0,7)int lane);// VLD2.16 {d0[0], d2[0]}, [r0] +//current IA SIMD doesn't support float16 + +//float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t src,__constrange(0,3) int lane);// VLD2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t* src,__constrange(0,3) int lane) // VLD2.32 {d0[0], d2[0]}, [r0] +{ + float32x4x2_t v; + v.val[0] = vld1q_lane_f32(ptr, src->val[0], lane); + v.val[1] = vld1q_lane_f32((ptr + 1), src->val[1], lane); + return v; +} +#define vld2q_lane_f32(ptr,src,lane) vld2q_lane_f32_ptr(ptr,&src,lane) + +//poly16x8x2_t vld2q_lane_p16(__transfersize(2) poly16_t const * ptr, poly16x8x2_t src,__constrange(0,7) int lane);// VLD2.16 {d0[0], d2[0]}, [r0] +#define vld2q_lane_p16 vld2q_lane_u16 + +_NEON2SSESTORAGE uint8x8x2_t vld2_lane_u8(__transfersize(2) uint8_t const * ptr, uint8x8x2_t src, __constrange(0,7) int lane);// VLD2.8 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE uint8x8x2_t vld2_lane_u8(__transfersize(2) uint8_t const * ptr, uint8x8x2_t src, __constrange(0,7) int lane) // VLD2.8 {d0[0], d1[0]}, [r0] +{ + uint8x8x2_t v; + v.val[0] = vld1_lane_u8(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u8((ptr + 1), src.val[1], lane); + return v; +} + +_NEON2SSESTORAGE uint16x4x2_t vld2_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x4x2_t src, __constrange(0,3)int lane);// VLD2.16 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE uint16x4x2_t vld2_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x4x2_t src, __constrange(0,3) int lane) +{ + uint16x4x2_t v; + v.val[0] = vld1_lane_u16(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u16((ptr + 1), src.val[1], lane); + return v; +} + +_NEON2SSESTORAGE uint32x2x2_t vld2_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x2x2_t src, __constrange(0,1)int lane);// VLD2.32 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE uint32x2x2_t vld2_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x2x2_t src, __constrange(0,1) int lane) +{ + uint32x2x2_t v; + v.val[0] = vld1_lane_u32(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u32((ptr + 1), src.val[1], lane); + return v; +} + +_NEON2SSE_GLOBAL int8x8x2_t vld2_lane_s8(__transfersize(2) int8_t const * ptr, int8x8x2_t src, __constrange(0,7) int lane);// VLD2.8 {d0[0], d1[0]}, [r0] +#define vld2_lane_s8(ptr, src, lane) vld2_lane_u8(( uint8_t*) ptr, src, lane) + +_NEON2SSE_GLOBAL int16x4x2_t vld2_lane_s16(__transfersize(2) int16_t const * ptr, int16x4x2_t src, __constrange(0,3) int lane);// VLD2.16 {d0[0], d1[0]}, [r0] +#define vld2_lane_s16(ptr, src, lane) vld2_lane_u16(( uint16_t*) ptr, src, lane) + +_NEON2SSE_GLOBAL int32x2x2_t vld2_lane_s32(__transfersize(2) int32_t const * ptr, int32x2x2_t src, __constrange(0,1) int lane);// VLD2.32 {d0[0], d1[0]}, [r0] +#define vld2_lane_s32(ptr, src, lane) vld2_lane_u32(( uint32_t*) ptr, src, lane) + +//float16x4x2_t vld2_lane_f16(__transfersize(2) __fp16 const * ptr, float16x4x2_t src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE float32x2x2_t vld2_lane_f32(__transfersize(2) float32_t const * ptr, float32x2x2_t src,__constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE float32x2x2_t vld2_lane_f32(__transfersize(2) float32_t const * ptr, float32x2x2_t src,__constrange(0,1) int lane) +{ + float32x2x2_t v; + v.val[0] = vld1_lane_f32(ptr, src.val[0], lane); + v.val[1] = vld1_lane_f32((ptr + 1), src.val[1], lane); + return v; +} + +//poly8x8x2_t vld2_lane_p8(__transfersize(2) poly8_t const * ptr, poly8x8x2_t src, __constrange(0,7) int lane);// VLD2.8 {d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); // VLD2.8 {d0[0], d1[0]}, [r0] +#define vld2_lane_p8 vld2_lane_u8 + +//poly16x4x2_t vld2_lane_p16(__transfersize(2) poly16_t const * ptr, poly16x4x2_t src, __constrange(0,3)int lane);// VLD2.16 {d0[0], d1[0]}, [r0] +_NEON2SSE_GLOBAL poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0] +#define vld2_lane_p16 vld2_lane_u16 + +//*********** Lane triplets ********************** +//************************************************* +//does vld1_lane_xx ptr[0] to src->val[0], ptr[1] to src->val[1] and ptr[2] to src->val[2] at lane position +//we assume src is 16 bit aligned + +//uint16x8x3_t vld3q_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x8x3_t src,__constrange(0,7) int lane);// VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE uint16x8x3_t vld3q_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x8x3_t* src,__constrange(0,7) int lane) // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +{ + uint16x8x3_t v; + v.val[0] = _MM_INSERT_EPI16 ( src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI16 ( src->val[1], ptr[1], lane); + v.val[2] = _MM_INSERT_EPI16 ( src->val[2], ptr[2], lane); + return v; +} +#define vld3q_lane_u16(ptr, src, lane) vld3q_lane_u16_ptr(ptr, &src, lane) + +//uint32x4x3_t vld3q_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x4x3_t src,__constrange(0,3) int lane);// VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE uint32x4x3_t vld3q_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x4x3_t* src,__constrange(0,3) int lane) // VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +{ + uint32x4x3_t v; + v.val[0] = _MM_INSERT_EPI32 ( src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI32 ( src->val[1], ptr[1], lane); + v.val[2] = _MM_INSERT_EPI32 ( src->val[2], ptr[2], lane); + return v; +} +#define vld3q_lane_u32(ptr, src, lane) vld3q_lane_u32_ptr(ptr, &src, lane) + +//int16x8x3_t vld3q_lane_s16(__transfersize(3) int16_t const * ptr, int16x8x3_t src, __constrange(0,7)int lane);// VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE int16x8x3_t vld3q_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x8x3_t* src, __constrange(0,7) int lane) // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +{ + int16x8x3_t v; + v.val[0] = _MM_INSERT_EPI16 ( src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI16 ( src->val[1], ptr[1], lane); + v.val[2] = _MM_INSERT_EPI16 ( src->val[2], ptr[2], lane); + return v; +} +#define vld3q_lane_s16(ptr, src, lane) vld3q_lane_s16_ptr(ptr, &src, lane) + +//int32x4x3_t vld3q_lane_s32(__transfersize(3) int32_t const * ptr, int32x4x3_t src, __constrange(0,3)int lane);// VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE int32x4x3_t vld3q_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x4x3_t* src, __constrange(0,3) int lane) // VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +{ + int32x4x3_t v; + v.val[0] = _MM_INSERT_EPI32 ( src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI32 ( src->val[1], ptr[1], lane); + v.val[2] = _MM_INSERT_EPI32 ( src->val[2], ptr[2], lane); + return v; +} +#define vld3q_lane_s32(ptr, src, lane) vld3q_lane_s32_ptr(ptr, &src, lane) + +_NEON2SSE_GLOBAL float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +//current IA SIMD doesn't support float16 +#define vld3q_lane_f16(ptr, src, lane) vld3q_lane_f16_ptr(ptr, &src, lane) + + +//float32x4x3_t vld3q_lane_f32(__transfersize(3) float32_t const * ptr, float32x4x3_t src,__constrange(0,3) int lane);// VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE float32x4x3_t vld3q_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x4x3_t* src,__constrange(0,3) int lane) // VLD3.32 {d0[0], d2[0], d4[0]}, [r0] +{ + float32x4x3_t v; + v.val[0] = vld1q_lane_f32(&ptr[0], src->val[0], lane); + v.val[1] = vld1q_lane_f32(&ptr[1], src->val[1], lane); + v.val[2] = vld1q_lane_f32(&ptr[2], src->val[2], lane); + return v; +} +#define vld3q_lane_f32(ptr,src,lane) vld3q_lane_f32_ptr(ptr,&src,lane) + +_NEON2SSE_GLOBAL poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src,__constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0] +#define vld3q_lane_p16 vld3q_lane_u16 + +_NEON2SSESTORAGE uint8x8x3_t vld3_lane_u8(__transfersize(3) uint8_t const * ptr, uint8x8x3_t src, __constrange(0,7) int lane);// VLD3.8 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE uint8x8x3_t vld3_lane_u8(__transfersize(3) uint8_t const * ptr, uint8x8x3_t src, __constrange(0,7) int lane) // VLD3.8 {d0[0], d1[0], d2[0]}, [r0] +{ + uint8x8x3_t v; + v.val[0] = vld1_lane_u8(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u8((ptr + 1), src.val[1], lane); + v.val[2] = vld1_lane_u8((ptr + 2), src.val[2], lane); + return v; +} + +_NEON2SSESTORAGE uint16x4x3_t vld3_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x4x3_t src, __constrange(0,3)int lane);// VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE uint16x4x3_t vld3_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x4x3_t src, __constrange(0,3) int lane) // VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +{ + uint16x4x3_t v; + v.val[0] = vld1_lane_u16(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u16((ptr + 1), src.val[1], lane); + v.val[2] = vld1_lane_u16((ptr + 2), src.val[2], lane); + return v; +} + +_NEON2SSESTORAGE uint32x2x3_t vld3_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x2x3_t src, __constrange(0,1)int lane);// VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE uint32x2x3_t vld3_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x2x3_t src, __constrange(0,1) int lane) // VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +{ + //need to merge into 128 bit anyway + uint32x2x3_t v; + v.val[0] = vld1_lane_u32(ptr, src.val[0], lane);; + v.val[1] = vld1_lane_u32((ptr + 1), src.val[1], lane);; + v.val[2] = vld1_lane_u32((ptr + 2), src.val[2], lane);; + return v; +} + +_NEON2SSE_GLOBAL int8x8x3_t vld3_lane_s8(__transfersize(3) int8_t const * ptr, int8x8x3_t src, __constrange(0,7) int lane); // VLD3.8 {d0[0], d1[0], d2[0]}, [r0] +#define vld3_lane_s8(ptr, src, lane) vld3_lane_u8(( uint8_t*) ptr, src, lane) + +_NEON2SSE_GLOBAL int16x4x3_t vld3_lane_s16(__transfersize(3) int16_t const * ptr, int16x4x3_t src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +#define vld3_lane_s16(ptr, src, lane) vld3_lane_u16(( uint16_t*) ptr, src, lane) + +_NEON2SSE_GLOBAL int32x2x3_t vld3_lane_s32(__transfersize(3) int32_t const * ptr, int32x2x3_t src, __constrange(0,1) int lane); // VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +#define vld3_lane_s32(ptr, src, lane) vld3_lane_u32(( uint32_t*) ptr, src, lane) + +_NEON2SSE_GLOBAL float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE float32x2x3_t vld3_lane_f32(__transfersize(3) float32_t const * ptr, float32x2x3_t src,__constrange(0,1) int lane);// VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE float32x2x3_t vld3_lane_f32(__transfersize(3) float32_t const * ptr, float32x2x3_t src,__constrange(0,1) int lane) // VLD3.32 {d0[0], d1[0], d2[0]}, [r0] +{ + float32x2x3_t v; + v.val[0] = vld1_lane_f32(ptr, src.val[0], lane); + v.val[1] = vld1_lane_f32((ptr + 1), src.val[1], lane); + v.val[2] = vld1_lane_f32((ptr + 2), src.val[2], lane); + return v; +} + +_NEON2SSE_GLOBAL poly8x8x3_t vld3_lane_p8(__transfersize(3) poly8_t const * ptr, poly8x8x3_t src, __constrange(0,7) int lane); // VLD3.8 {d0[0], d1[0], d2[0]}, [r0] +#define vld3_lane_p8 vld3_lane_u8 + +_NEON2SSE_GLOBAL poly16x4x3_t vld3_lane_p16(__transfersize(3) poly16_t const * ptr, poly16x4x3_t src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0] +#define vld3_lane_p16 vld3_lane_u16 + +//******************* Lane Quadruples load *************************** +//********************************************************************* +//does vld1_lane_xx ptr[0] to src->val[0], ptr[1] to src->val[1], ptr[2] to src->val[2] and ptr[3] to src->val[3] at lane position +//we assume src is 16 bit aligned + +//uint16x8x4_t vld4q_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x8x4_t src,__constrange(0,7) int lane)// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_INLINE uint16x8x4_t vld4q_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x8x4_t* src,__constrange(0,7) int lane) +{ + uint16x8x4_t v; + v.val[0] = _MM_INSERT_EPI16 ( src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI16 ( src->val[1], ptr[1], lane); + v.val[2] = _MM_INSERT_EPI16 ( src->val[2], ptr[2], lane); + v.val[3] = _MM_INSERT_EPI16 ( src->val[3], ptr[3], lane); + return v; +} +#define vld4q_lane_u16(ptr, src, lane) vld4q_lane_u16_ptr(ptr, &src, lane) + +//uint32x4x4_t vld4q_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x4x4_t src,__constrange(0,3) int lane)// VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_INLINE uint32x4x4_t vld4q_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x4x4_t* src,__constrange(0,3) int lane) +{ + uint32x4x4_t v; + v.val[0] = _MM_INSERT_EPI32 ( src->val[0], ptr[0], lane); + v.val[1] = _MM_INSERT_EPI32 ( src->val[1], ptr[1], lane); + v.val[2] = _MM_INSERT_EPI32 ( src->val[2], ptr[2], lane); + v.val[3] = _MM_INSERT_EPI32 ( src->val[3], ptr[3], lane); + return v; +} +#define vld4q_lane_u32(ptr, src, lane) vld4q_lane_u32_ptr(ptr, &src, lane) + +//int16x8x4_t vld4q_lane_s16(__transfersize(4) int16_t const * ptr, int16x8x4_t src, __constrange(0,7)int lane);// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +#define vld4q_lane_s16(ptr, src, lane) vld4q_lane_u16(( uint16_t*) ptr, src, lane) + +//int32x4x4_t vld4q_lane_s32(__transfersize(4) int32_t const * ptr, int32x4x4_t src, __constrange(0,3)int lane);// VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +#define vld4q_lane_s32(ptr, src, lane) vld4q_lane_u32(( uint32_t*) ptr, src, lane) + +//float16x8x4_t vld4q_lane_f16(__transfersize(4) __fp16 const * ptr, float16x8x4_t src, __constrange(0,7)int lane);// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +//current IA SIMD doesn't support float16 + +//float32x4x4_t vld4q_lane_f32(__transfersize(4) float32_t const * ptr, float32x4x4_t src,__constrange(0,3) int lane)// VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_INLINE float32x4x4_t vld4q_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x4x4_t* src,__constrange(0,3) int lane) +{ + float32x4x4_t v; + v.val[0] = vld1q_lane_f32(&ptr[0], src->val[0], lane); + v.val[1] = vld1q_lane_f32(&ptr[1], src->val[1], lane); + v.val[2] = vld1q_lane_f32(&ptr[2], src->val[2], lane); + v.val[3] = vld1q_lane_f32(&ptr[3], src->val[3], lane); + return v; +} +#define vld4q_lane_f32(ptr,val,lane) vld4q_lane_f32_ptr(ptr,&val,lane) + +//poly16x8x4_t vld4q_lane_p16(__transfersize(4) poly16_t const * ptr, poly16x8x4_t src,__constrange(0,7) int lane);// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src,__constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +#define vld4q_lane_p16 vld4q_lane_u16 + +_NEON2SSESTORAGE uint8x8x4_t vld4_lane_u8(__transfersize(4) uint8_t const * ptr, uint8x8x4_t src, __constrange(0,7) int lane);// VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE uint8x8x4_t vld4_lane_u8(__transfersize(4) uint8_t const * ptr, uint8x8x4_t src, __constrange(0,7) int lane) +{ + uint8x8x4_t v; + v.val[0] = vld1_lane_u8(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u8((ptr + 1), src.val[1], lane); + v.val[2] = vld1_lane_u8((ptr + 2), src.val[2], lane); + v.val[3] = vld1_lane_u8((ptr + 3), src.val[3], lane); + return v; +} + +_NEON2SSESTORAGE uint16x4x4_t vld4_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x4x4_t src, __constrange(0,3)int lane);// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE uint16x4x4_t vld4_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x4x4_t src, __constrange(0,3) int lane) +{ + uint16x4x4_t v; + v.val[0] = vld1_lane_u16(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u16((ptr + 1), src.val[1], lane); + v.val[2] = vld1_lane_u16((ptr + 2), src.val[2], lane); + v.val[3] = vld1_lane_u16((ptr + 3), src.val[3], lane); + return v; +} + +_NEON2SSESTORAGE uint32x2x4_t vld4_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x2x4_t src, __constrange(0,1)int lane);// VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE uint32x2x4_t vld4_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x2x4_t src, __constrange(0,1) int lane) +{ + uint32x2x4_t v; + v.val[0] = vld1_lane_u32(ptr, src.val[0], lane); + v.val[1] = vld1_lane_u32((ptr + 1), src.val[1], lane); + v.val[2] = vld1_lane_u32((ptr + 2), src.val[2], lane); + v.val[3] = vld1_lane_u32((ptr + 3), src.val[3], lane); + return v; +} + +_NEON2SSE_GLOBAL int8x8x4_t vld4_lane_s8(__transfersize(4) int8_t const * ptr, int8x8x4_t src, __constrange(0,7) int lane);// VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vld4_lane_s8(ptr,src,lane) vld4_lane_u8((uint8_t*)ptr,src,lane) + +_NEON2SSE_GLOBAL int16x4x4_t vld4_lane_s16(__transfersize(4) int16_t const * ptr, int16x4x4_t src, __constrange(0,3) int lane);// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vld4_lane_s16(ptr,src,lane) vld4_lane_u16((uint16_t*)ptr,src,lane) + +_NEON2SSE_GLOBAL int32x2x4_t vld4_lane_s32(__transfersize(4) int32_t const * ptr, int32x2x4_t src, __constrange(0,1) int lane);// VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vld4_lane_s32(ptr,src,lane) vld4_lane_u32((uint32_t*)ptr,src,lane) + +//float16x4x4_t vld4_lane_f16(__transfersize(4) __fp16 const * ptr, float16x4x4_t src, __constrange(0,3)int lane);// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE float32x2x4_t vld4_lane_f32(__transfersize(4) float32_t const * ptr, float32x2x4_t src,__constrange(0,1) int lane);// VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE float32x2x4_t vld4_lane_f32(__transfersize(4) float32_t const * ptr, float32x2x4_t src,__constrange(0,1) int lane) +{ + //serial solution may be faster + float32x2x4_t v; + v.val[0] = vld1_lane_f32(ptr, src.val[0], lane); + v.val[1] = vld1_lane_f32((ptr + 1), src.val[1], lane); + v.val[2] = vld1_lane_f32((ptr + 2), src.val[2], lane); + v.val[3] = vld1_lane_f32((ptr + 3), src.val[3], lane); + return v; +} + +_NEON2SSE_GLOBAL poly8x8x4_t vld4_lane_p8(__transfersize(4) poly8_t const * ptr, poly8x8x4_t src, __constrange(0,7) int lane);// VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vld4_lane_p8 vld4_lane_u8 + +_NEON2SSE_GLOBAL poly16x4x4_t vld4_lane_p16(__transfersize(4) poly16_t const * ptr, poly16x4x4_t src, __constrange(0,3)int lane);// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vld4_lane_p16 vld4_lane_u16 + +//******************* Store duplets ********************************************* +//******************************************************************************** +//void vst2q_u8(__transfersize(32) uint8_t * ptr, uint8x16x2_t val)// VST2.8 {d0, d2}, [r0] +_NEON2SSE_INLINE void vst2q_u8_ptr(__transfersize(32) uint8_t * ptr, uint8x16x2_t const * val) +{ + uint8x16x2_t v; + v.val[0] = _mm_unpacklo_epi8(val->val[0], val->val[1]); + v.val[1] = _mm_unpackhi_epi8(val->val[0], val->val[1]); + vst1q_u8 (ptr, v.val[0]); + vst1q_u8 ((ptr + 16), v.val[1]); +} +#define vst2q_u8(ptr, val) vst2q_u8_ptr(ptr, &val) + +//void vst2q_u16(__transfersize(16) uint16_t * ptr, uint16x8x2_t val)// VST2.16 {d0, d2}, [r0] +_NEON2SSE_INLINE void vst2q_u16_ptr(__transfersize(16) uint16_t * ptr, uint16x8x2_t const * val) +{ + uint16x8x2_t v; + v.val[0] = _mm_unpacklo_epi16(val->val[0], val->val[1]); + v.val[1] = _mm_unpackhi_epi16(val->val[0], val->val[1]); + vst1q_u16 (ptr, v.val[0]); + vst1q_u16 ((ptr + 8), v.val[1]); +} +#define vst2q_u16(ptr, val) vst2q_u16_ptr(ptr, &val) + +//void vst2q_u32(__transfersize(8) uint32_t * ptr, uint32x4x2_t val)// VST2.32 {d0, d2}, [r0] +_NEON2SSE_INLINE void vst2q_u32_ptr(__transfersize(8) uint32_t* ptr, uint32x4x2_t const * val) +{ + uint32x4x2_t v; + v.val[0] = _mm_unpacklo_epi32(val->val[0], val->val[1]); + v.val[1] = _mm_unpackhi_epi32(val->val[0], val->val[1]); + vst1q_u32 (ptr, v.val[0]); + vst1q_u32 ((ptr + 4), v.val[1]); +} +#define vst2q_u32(ptr, val) vst2q_u32_ptr(ptr, &val) + +//void vst2q_s8(__transfersize(32) int8_t * ptr, int8x16x2_t val); // VST2.8 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_s8_ptr(__transfersize(32) int8_t * ptr, int8x16x2_t const * val); +#define vst2q_s8(ptr, val) vst2q_u8((uint8_t*)(ptr), val) + +//void vst2q_s16(__transfersize(16) int16_t * ptr, int16x8x2_t val);// VST2.16 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_s16_ptr(__transfersize(16) int16_t * ptr, int16x8x2_t const * val); +#define vst2q_s16(ptr, val) vst2q_u16((uint16_t*)(ptr), val) + +//void vst2q_s32(__transfersize(8) int32_t * ptr, int32x4x2_t val);// VST2.32 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_s32_ptr(__transfersize(8) int32_t * ptr, int32x4x2_t const * val); +#define vst2q_s32(ptr, val) vst2q_u32((uint32_t*)(ptr), val) + +//void vst2q_f16(__transfersize(16) __fp16 * ptr, float16x8x2_t val);// VST2.16 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_f16_ptr(__transfersize(16) __fp16 * ptr, float16x8x2_t const * val); +// IA32 SIMD doesn't work with 16bit floats currently + +//void vst2q_f32(__transfersize(8) float32_t * ptr, float32x4x2_t val)// VST2.32 {d0, d2}, [r0] +_NEON2SSE_INLINE void vst2q_f32_ptr(__transfersize(8) float32_t* ptr, float32x4x2_t const * val) +{ + float32x4x2_t v; + v.val[0] = _mm_unpacklo_ps(val->val[0], val->val[1]); + v.val[1] = _mm_unpackhi_ps(val->val[0], val->val[1]); + vst1q_f32 (ptr, v.val[0]); + vst1q_f32 ((ptr + 4), v.val[1]); +} +#define vst2q_f32(ptr, val) vst2q_f32_ptr(ptr, &val) + +//void vst2q_p8(__transfersize(32) poly8_t * ptr, poly8x16x2_t val);// VST2.8 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_p8_ptr(__transfersize(32) poly8_t * ptr, poly8x16x2_t const * val); +#define vst2q_p8 vst2q_u8 + +//void vst2q_p16(__transfersize(16) poly16_t * ptr, poly16x8x2_t val);// VST2.16 {d0, d2}, [r0] +_NEON2SSE_GLOBAL void vst2q_p16_ptr(__transfersize(16) poly16_t * ptr, poly16x8x2_t const * val); +#define vst2q_p16 vst2q_u16 + +_NEON2SSESTORAGE void vst2_u8(__transfersize(16) uint8_t * ptr, uint8x8x2_t val);// VST2.8 {d0, d1}, [r0] +_NEON2SSE_INLINE void vst2_u8(__transfersize(16) uint8_t * ptr, uint8x8x2_t val) +{ + __m128i v0; + v0 = _mm_unpacklo_epi8(_pM128i(val.val[0]), _pM128i(val.val[1])); + vst1q_u8 (ptr, v0); +} + +_NEON2SSESTORAGE void vst2_u16(__transfersize(8) uint16_t * ptr, uint16x4x2_t val);// VST2.16 {d0, d1}, [r0] +_NEON2SSE_INLINE void vst2_u16(__transfersize(8) uint16_t * ptr, uint16x4x2_t val) +{ + __m128i v0; + v0 = _mm_unpacklo_epi16(_pM128i(val.val[0]), _pM128i(val.val[1])); + vst1q_u16 (ptr, v0); +} + +_NEON2SSESTORAGE void vst2_u32(__transfersize(4) uint32_t * ptr, uint32x2x2_t val);// VST2.32 {d0, d1}, [r0] +_NEON2SSE_INLINE void vst2_u32(__transfersize(4) uint32_t * ptr, uint32x2x2_t val) +{ + __m128i v0; + v0 = _mm_unpacklo_epi32(_pM128i(val.val[0]), _pM128i(val.val[1])); + vst1q_u32 (ptr, v0); +} + +_NEON2SSESTORAGE void vst2_u64(__transfersize(2) uint64_t * ptr, uint64x1x2_t val);// VST1.64 {d0, d1}, [r0] +_NEON2SSE_INLINE void vst2_u64(__transfersize(2) uint64_t * ptr, uint64x1x2_t val) +{ + *(ptr) = val.val[0].m64_u64[0]; + *(ptr + 1) = val.val[1].m64_u64[0]; +} + +_NEON2SSE_GLOBAL void vst2_s8(__transfersize(16) int8_t * ptr, int8x8x2_t val);// VST2.8 {d0, d1}, [r0] +#define vst2_s8(ptr, val) vst2_u8((uint8_t*) ptr, val) + +_NEON2SSE_GLOBAL void vst2_s16(__transfersize(8) int16_t * ptr, int16x4x2_t val); // VST2.16 {d0, d1}, [r0] +#define vst2_s16(ptr,val) vst2_u16((uint16_t*) ptr, val) + +_NEON2SSE_GLOBAL void vst2_s32(__transfersize(4) int32_t * ptr, int32x2x2_t val); // VST2.32 {d0, d1}, [r0] +#define vst2_s32(ptr,val) vst2_u32((uint32_t*) ptr, val) + +_NEON2SSE_GLOBAL void vst2_s64(__transfersize(2) int64_t * ptr, int64x1x2_t val); +#define vst2_s64(ptr,val) vst2_u64((uint64_t*) ptr,val) + +//void vst2_f16(__transfersize(8) __fp16 * ptr, float16x4x2_t val); // VST2.16 {d0, d1}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE void vst2_f32(__transfersize(4) float32_t * ptr, float32x2x2_t val); // VST2.32 {d0, d1}, [r0] +_NEON2SSE_INLINE void vst2_f32(__transfersize(4) float32_t* ptr, float32x2x2_t val) +{ + *(ptr) = val.val[0].m64_f32[0]; + *(ptr + 1) = val.val[1].m64_f32[0]; + *(ptr + 2) = val.val[0].m64_f32[1]; + *(ptr + 3) = val.val[1].m64_f32[1]; +} + +_NEON2SSE_GLOBAL void vst2_p8(__transfersize(16) poly8_t * ptr, poly8x8x2_t val); // VST2.8 {d0, d1}, [r0] +#define vst2_p8 vst2_u8 + +_NEON2SSE_GLOBAL void vst2_p16(__transfersize(8) poly16_t * ptr, poly16x4x2_t val); // VST2.16 {d0, d1}, [r0] +#define vst2_p16 vst2_u16 + +//******************** Triplets store ***************************************** +//****************************************************************************** +//void vst3q_u8(__transfersize(48) uint8_t * ptr, uint8x16x3_t val)// VST3.8 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE void vst3q_u8_ptr(__transfersize(48) uint8_t * ptr, uint8x16x3_t const * val) +{ + uint8x16x3_t v; + __m128i v0,v1,v2, cff, bldmask; + _NEON2SSE_ALIGN_16 static const uint8_t mask0[16] = {0, 1, 0xff, 2, 3,0xff, 4, 5,0xff, 6,7,0xff, 8,9,0xff, 10}; + _NEON2SSE_ALIGN_16 static const uint8_t mask1[16] = {0, 0xff, 1, 2, 0xff, 3, 4, 0xff, 5, 6, 0xff, 7,8,0xff, 9,10}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2[16] = {0xff, 6, 7, 0xff, 8, 9,0xff, 10, 11,0xff, 12,13,0xff, 14,15,0xff}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2lo[16] = {0xff,0xff, 0, 0xff,0xff, 1, 0xff,0xff, 2, 0xff,0xff, 3, 0xff,0xff, 4, 0xff}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2med[16] = {0xff, 5, 0xff, 0xff, 6, 0xff,0xff, 7, 0xff,0xff, 8, 0xff,0xff, 9, 0xff, 0xff}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2hi[16] = {10, 0xff,0xff, 11, 0xff,0xff, 12, 0xff,0xff, 13, 0xff,0xff, 14, 0xff, 0xff, 15}; + + v0 = _mm_unpacklo_epi8(val->val[0], val->val[1]); //0,1, 3,4, 6,7, 9,10, 12,13, 15,16, 18,19, 21,22 + v2 = _mm_unpackhi_epi8(val->val[0], val->val[1]); //24,25, 27,28, 30,31, 33,34, 36,37, 39,40, 42,43, 45,46 + v1 = _mm_alignr_epi8(v2, v0, 11); //12,13, 15,16, 18,19, 21,22, 24,25, 27,28, 30,31, 33,34 + v.val[0] = _mm_shuffle_epi8(v0, *(__m128i*)mask0); //make holes for the v.val[2] data embedding + v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2lo); //make plugs for the v.val[2] data embedding + cff = _mm_cmpeq_epi8(v0, v0); //all ff + bldmask = _mm_cmpeq_epi8(*(__m128i*)mask0, cff); + v.val[0] = _MM_BLENDV_EPI8(v.val[0], v.val[2], bldmask); + vst1q_u8(ptr, v.val[0]); + v.val[0] = _mm_shuffle_epi8(v1, *(__m128i*)mask1); //make holes for the v.val[2] data embedding + v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2med); //make plugs for the v.val[2] data embedding + bldmask = _mm_cmpeq_epi8(*(__m128i*)mask1, cff); + v.val[1] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask); + vst1q_u8((ptr + 16), v.val[1]); + v.val[0] = _mm_shuffle_epi8(v2, *(__m128i*)mask2); //make holes for the v.val[2] data embedding + v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2hi); //make plugs for the v.val[2] data embedding + bldmask = _mm_cmpeq_epi8(*(__m128i*)mask2, cff); + v.val[2] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask ); + vst1q_u8((ptr + 32), v.val[2]); +} +#define vst3q_u8(ptr, val) vst3q_u8_ptr(ptr, &val) + +//void vst3q_u16(__transfersize(24) uint16_t * ptr, uint16x8x3_t val)// VST3.16 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE void vst3q_u16_ptr(__transfersize(24) uint16_t * ptr, uint16x8x3_t const * val) +{ + uint16x8x3_t v; + __m128i v0,v1,v2, cff, bldmask; + _NEON2SSE_ALIGN_16 static const uint8_t mask0[16] = {0,1, 2,3, 0xff,0xff, 4,5, 6,7,0xff,0xff, 8,9,10,11}; + _NEON2SSE_ALIGN_16 static const uint8_t mask1[16] = {0xff, 0xff, 0,1, 2,3, 0xff,0xff, 4,5, 6,7, 0xff,0xff, 8,9}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2[16] = {6,7,0xff,0xff, 8,9,10,11, 0xff, 0xff, 12,13,14,15, 0xff, 0xff}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2lo[16] = {0xff,0xff, 0xff,0xff, 0,1, 0xff,0xff, 0xff,0xff, 2,3, 0xff,0xff, 0xff,0xff}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2med[16] = {4,5, 0xff,0xff,0xff,0xff, 6,7, 0xff, 0xff,0xff,0xff, 8,9, 0xff, 0xff}; + _NEON2SSE_ALIGN_16 static const uint8_t mask2hi[16] = {0xff, 0xff, 10,11, 0xff, 0xff, 0xff, 0xff, 12,13, 0xff, 0xff, 0xff, 0xff,14,15}; + + v0 = _mm_unpacklo_epi16(val->val[0], val->val[1]); //0,1, 3,4, 6,7, 9,10 + v2 = _mm_unpackhi_epi16(val->val[0], val->val[1]); //12,13, 15,16, 18,19, 21,22, + v1 = _mm_alignr_epi8(v2, v0, 12); //9,10, 12,13, 15,16, 18,19 + v.val[0] = _mm_shuffle_epi8(v0, *(__m128i*)mask0); //make holes for the v.val[2] data embedding + v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2lo); //make plugs for the v.val[2] data embedding + cff = _mm_cmpeq_epi16(v0, v0); //all ff + bldmask = _mm_cmpeq_epi16(*(__m128i*)mask0, cff); + v.val[0] = _MM_BLENDV_EPI8(v.val[0], v.val[2], bldmask); + vst1q_u16(ptr, v.val[0]); + v.val[0] = _mm_shuffle_epi8(v1, *(__m128i*)mask1); //make holes for the v.val[2] data embedding + v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2med); //make plugs for the v.val[2] data embedding + bldmask = _mm_cmpeq_epi16(*(__m128i*)mask1, cff); + v.val[1] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask); + vst1q_u16((ptr + 8), v.val[1]); + v.val[0] = _mm_shuffle_epi8(v2, *(__m128i*)mask2); //make holes for the v.val[2] data embedding + v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2hi); //make plugs for the v.val[2] data embedding + bldmask = _mm_cmpeq_epi16(*(__m128i*)mask2, cff); + v.val[2] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask ); + vst1q_u16((ptr + 16), v.val[2]); +} +#define vst3q_u16(ptr, val) vst3q_u16_ptr(ptr, &val) + +//void vst3q_u32(__transfersize(12) uint32_t * ptr, uint32x4x3_t val)// VST3.32 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE void vst3q_u32_ptr(__transfersize(12) uint32_t * ptr, uint32x4x3_t const * val) +{ + //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,b0,c0,a1, b1,c1,a2,b2, c2,a3,b3,c3 + uint32x4x3_t v; + __m128i tmp0, tmp1,tmp2; + tmp0 = _mm_unpacklo_epi32(val->val[0], val->val[1]); //a0,b0,a1,b1 + tmp1 = _mm_unpackhi_epi32(val->val[0], val->val[1]); //a2,b2,a3,b3 + tmp2 = _mm_unpacklo_epi32(val->val[1], val->val[2]); //b0,c0,b1,c1 + v.val[1] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp2),_mm_castsi128_ps(tmp1), _MM_SHUFFLE(1,0,3,2))); //b1,c1,a2,b2, + v.val[2] = _mm_unpackhi_epi64(tmp1, val->val[2]); //a3,b3, c2,c3 + v.val[2] = _mm_shuffle_epi32(v.val[2], 2 | (0 << 2) | (1 << 4) | (3 << 6)); //c2,a3,b3,c3 + tmp1 = _mm_unpacklo_epi32(tmp2,val->val[0]); //b0,a0,c0,a1 + v.val[0] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp0),_mm_castsi128_ps(tmp1), _MM_SHUFFLE(3,2,1,0))); //a0,b0,c0,a1, + + vst1q_u32(ptr, v.val[0]); + vst1q_u32((ptr + 4), v.val[1]); + vst1q_u32((ptr + 8), v.val[2]); +} +#define vst3q_u32(ptr, val) vst3q_u32_ptr(ptr, &val) + +//void vst3q_s8(__transfersize(48) int8_t * ptr, int8x16x3_t val); +_NEON2SSE_GLOBAL void vst3q_s8_ptr(__transfersize(48) int8_t * ptr, int8x16x3_t const * val); +#define vst3q_s8(ptr, val) vst3q_u8((uint8_t*)(ptr), val) + +//void vst3q_s16(__transfersize(24) int16_t * ptr, int16x8x3_t val); +_NEON2SSE_GLOBAL void vst3q_s16_ptr(__transfersize(24) int16_t * ptr, int16x8x3_t const * val); +#define vst3q_s16(ptr, val) vst3q_u16((uint16_t*)(ptr), val) + +//void vst3q_s32(__transfersize(12) int32_t * ptr, int32x4x3_t val); +_NEON2SSE_GLOBAL void vst3q_s32_ptr(__transfersize(12) int32_t * ptr, int32x4x3_t const * val); +#define vst3q_s32(ptr, val) vst3q_u32((uint32_t*)(ptr), val) + +//void vst3q_f16(__transfersize(24) __fp16 * ptr, float16x8x3_t val);// VST3.16 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_f16_ptr(__transfersize(24) __fp16 * ptr, float16x8x3_t const * val); +// IA32 SIMD doesn't work with 16bit floats currently + +//void vst3q_f32(__transfersize(12) float32_t * ptr, float32x4x3_t val)// VST3.32 {d0, d2, d4}, [r0] +_NEON2SSE_INLINE void vst3q_f32_ptr(__transfersize(12) float32_t * ptr, float32x4x3_t const * val) +{ + float32x4x3_t v; + __m128 tmp0, tmp1,tmp2; + tmp0 = _mm_unpacklo_ps(val->val[0], val->val[1]); //a0,b0,a1,b1 + tmp1 = _mm_unpackhi_ps(val->val[0], val->val[1]); //a2,b2,a3,b3 + tmp2 = _mm_unpacklo_ps(val->val[1], val->val[2]); //b0,c0,b1,c1 + v.val[1] = _mm_shuffle_ps(tmp2,tmp1, _MM_SHUFFLE(1,0,3,2)); //b1,c1,a2,b2, + v.val[2] = _mm_movehl_ps(val->val[2],tmp1); //a3,b3, c2,c3 + v.val[2] = _mm_shuffle_ps(v.val[2],v.val[2], _MM_SHUFFLE(3,1,0,2)); //c2,a3,b3,c3 + tmp1 = _mm_unpacklo_ps(tmp2,val->val[0]); //b0,a0,c0,a1 + v.val[0] = _mm_shuffle_ps(tmp0,tmp1, _MM_SHUFFLE(3,2,1,0)); //a0,b0,c0,a1, + + vst1q_f32( ptr, v.val[0]); + vst1q_f32( (ptr + 4), v.val[1]); + vst1q_f32( (ptr + 8), v.val[2]); +} +#define vst3q_f32(ptr, val) vst3q_f32_ptr(ptr, &val) + +//void vst3q_p8(__transfersize(48) poly8_t * ptr, poly8x16x3_t val);// VST3.8 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_p8_ptr(__transfersize(48) poly8_t * ptr, poly8x16x3_t const * val); +#define vst3q_p8 vst3q_u8 + +//void vst3q_p16(__transfersize(24) poly16_t * ptr, poly16x8x3_t val);// VST3.16 {d0, d2, d4}, [r0] +_NEON2SSE_GLOBAL void vst3q_p16_ptr(__transfersize(24) poly16_t * ptr, poly16x8x3_t const * val); +#define vst3q_p16 vst3q_u16 + +_NEON2SSESTORAGE void vst3_u8(__transfersize(24) uint8_t * ptr, uint8x8x3_t val);// VST3.8 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE void vst3_u8(__transfersize(24) uint8_t * ptr, uint8x8x3_t val) +{ + __m128i tmp, sh0, sh1, val0, val2; + _NEON2SSE_ALIGN_16 static const int8_t mask0[16] = { 0, 8, 16, 1, 9, 17, 2, 10, 18, 3, 11, 19, 4, 12, 20, 5}; + _NEON2SSE_ALIGN_16 static const int8_t mask1[16] = {13, 21, 6, 14, 22, 7, 15, 23, 0,0,0,0,0,0,0,0}; + _NEON2SSE_ALIGN_16 static const uint8_t mask0_sel[16] = {0, 0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0}; + _NEON2SSE_ALIGN_16 static const uint8_t mask1_sel[16] = {0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0,0,0,0,0,0,0,0}; + tmp = _mm_unpacklo_epi64(_pM128i(val.val[0]), _pM128i(val.val[1]) ); + sh0 = _mm_shuffle_epi8(tmp, *(__m128i*)mask0); //for bi>15 bi is wrapped (bi-=15) + val2 = _pM128i(val.val[2]); + sh1 = _mm_shuffle_epi8(val2, *(__m128i*)mask0); + val0 = _MM_BLENDV_EPI8(sh0, sh1, *(__m128i*)mask0_sel); + vst1q_u8(ptr, val0); //store as 128 bit structure + sh0 = _mm_shuffle_epi8(tmp, *(__m128i*)mask1); //for bi>15 bi is wrapped (bi-=15) + sh1 = _mm_shuffle_epi8(val2, *(__m128i*)mask1); + val2 = _MM_BLENDV_EPI8(sh0, sh1, *(__m128i*)mask1_sel); + _M64((*(__m64_128*)(ptr + 16)), val2); //need it to fit into *ptr memory +} + +_NEON2SSESTORAGE void vst3_u16(__transfersize(12) uint16_t * ptr, uint16x4x3_t val);// VST3.16 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE void vst3_u16(__transfersize(12) uint16_t * ptr, uint16x4x3_t val) +{ + __m128i tmp, val0, val1, val2; + _NEON2SSE_ALIGN_16 static const int8_t mask0[16] = {0,1, 8,9, 16,17, 2,3, 10,11, 18,19, 4,5, 12,13}; + _NEON2SSE_ALIGN_16 static const int8_t mask1[16] = {20,21, 6,7, 14,15, 22,23, 0,0,0,0,0,0,0,0}; + _NEON2SSE_ALIGN_16 static const uint16_t mask0f[8] = {0xffff, 0xffff, 0, 0xffff, 0xffff, 0, 0xffff, 0xffff}; //if all ones we take the result from v.val[0] otherwise from v.val[1] + _NEON2SSE_ALIGN_16 static const uint16_t mask1f[8] = {0xffff, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}; //if all ones we take the result from v.val[1] otherwise from v.val[0] + tmp = _mm_unpacklo_epi64(_pM128i(val.val[0]), _pM128i(val.val[1])); + val0 = _mm_shuffle_epi8(tmp, *(__m128i*)mask0); + val2 = _pM128i(val.val[2]); + val1 = _mm_shuffle_epi8(val2, *(__m128i*)mask0); + val0 = _MM_BLENDV_EPI8(val1, val0, *(__m128i*)mask0f); + vst1q_u16(ptr, val0); //store as 128 bit structure + val0 = _mm_shuffle_epi8(tmp, *(__m128i*)mask1); + val1 = _mm_shuffle_epi8(val2, *(__m128i*)mask1); + val1 = _MM_BLENDV_EPI8(val0, val1, *(__m128i*)mask1f); //change the operands order + _M64((*(__m64_128*)(ptr + 8)), val1); //need it to fit into *ptr memory +} + +_NEON2SSESTORAGE void vst3_u32(__transfersize(6) uint32_t * ptr, uint32x2x3_t val);// VST3.32 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE void vst3_u32(__transfersize(6) uint32_t * ptr, uint32x2x3_t val) +{ + //val.val[0]:0,3,val.val[1]:1,4; val.val[2]:2,5,x,x; + __m128i val0, val1; + val0 = _mm_unpacklo_epi64(_pM128i(val.val[1]), _pM128i(val.val[2])); //val[0]: 1,4,2,5 + val0 = _mm_shuffle_epi32(val0, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //1,2,4,5 + val1 = _mm_srli_si128(val0, 8); //4,5, x,x + _M64((*(__m64_128*)(ptr + 4)), val1); + val0 = _mm_unpacklo_epi32(_pM128i(val.val[0]), val0); //0,1,3,2 + val0 = _mm_shuffle_epi32(val0, 0 | (1 << 2) | (3 << 4) | (2 << 6)); //0,1,2, 3 + vst1q_u32(ptr, val0); //store as 128 bit structure +} + +_NEON2SSESTORAGE void vst3_u64(__transfersize(3) uint64_t * ptr, uint64x1x3_t val);// VST1.64 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE void vst3_u64(__transfersize(3) uint64_t * ptr, uint64x1x3_t val) +{ + *(ptr) = val.val[0].m64_u64[0]; + *(ptr + 1) = val.val[1].m64_u64[0]; + *(ptr + 2) = val.val[2].m64_u64[0]; +} + +_NEON2SSE_GLOBAL void vst3_s8(__transfersize(24) int8_t * ptr, int8x8x3_t val); // VST3.8 {d0, d1, d2}, [r0] +#define vst3_s8(ptr, val) vst3_u8((uint8_t*)ptr, val) + +_NEON2SSE_GLOBAL void vst3_s16(__transfersize(12) int16_t * ptr, int16x4x3_t val); // VST3.16 {d0, d1, d2}, [r0] +#define vst3_s16(ptr, val) vst3_u16((uint16_t*)ptr, val) + +_NEON2SSE_GLOBAL void vst3_s32(__transfersize(6) int32_t * ptr, int32x2x3_t val); // VST3.32 {d0, d1, d2}, [r0] +#define vst3_s32(ptr, val) vst3_u32((uint32_t*)ptr, val) + +_NEON2SSE_GLOBAL void vst3_s64(__transfersize(3) int64_t * ptr, int64x1x3_t val); // VST1.64 {d0, d1, d2}, [r0] +#define vst3_s64(ptr, val) vst3_u64((uint64_t*)ptr, val) + +//void vst3_f16(__transfersize(12) __fp16 * ptr, float16x4x3_t val);// VST3.16 {d0, d1, d2}, [r0] +_NEON2SSE_GLOBAL void vst3_f16_ptr(__transfersize(12) __fp16 * ptr, float16x4x3_t const * val); // VST3.16 {d0, d1, d2}, [r0] +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE void vst3_f32(__transfersize(6) float32_t * ptr, float32x2x3_t val);// VST3.32 {d0, d1, d2}, [r0] +_NEON2SSE_INLINE void vst3_f32(__transfersize(6) float32_t * ptr, float32x2x3_t val) +{ + //val->val[0]:0,3,val->val[1]:1,4; val->val[2]:2,5,x,x; -> 0,2, 4,1, 3,5 + *(ptr) = val.val[0].m64_f32[0]; + *(ptr + 1) = val.val[1].m64_f32[0]; + *(ptr + 2) = val.val[2].m64_f32[0]; + *(ptr + 3) = val.val[0].m64_f32[1]; + *(ptr + 4) = val.val[1].m64_f32[1]; + *(ptr + 5) = val.val[2].m64_f32[1]; +} + +_NEON2SSE_GLOBAL void vst3_p8(__transfersize(24) poly8_t * ptr, poly8x8x3_t val);// VST3.8 {d0, d1, d2}, [r0] +#define vst3_p8 vst3_u8 + +_NEON2SSE_GLOBAL void vst3_p16(__transfersize(12) poly16_t * ptr, poly16x4x3_t val);// VST3.16 {d0, d1, d2}, [r0] +#define vst3_p16 vst3_u16 + +//*************** Quadruples store ******************************** +//********************************************************************* +//void vst4q_u8(__transfersize(64) uint8_t * ptr, uint8x16x4_t val)// VST4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE void vst4q_u8_ptr(__transfersize(64) uint8_t * ptr, uint8x16x4_t const * val) +{ + __m128i tmp1, tmp2, res; + tmp1 = _mm_unpacklo_epi8(val->val[0], val->val[1]); // 0,1, 4,5, 8,9, 12,13, 16,17, 20,21, 24,25, 28,29 + tmp2 = _mm_unpacklo_epi8(val->val[2], val->val[3]); // 2,3, 6,7, 10,11, 14,15, 18,19, 22,23, 26,27, 30,31 + res = _mm_unpacklo_epi16(tmp1, tmp2); //0,1, 2,3, 4,5, 6,7, 8,9, 10,11, 12,13, 14,15 + vst1q_u8(ptr, res); + res = _mm_unpackhi_epi16(tmp1, tmp2); //16,17, 18,19, 20,21, 22,23, 24,25, 26,27, 28,29, 30,31 + vst1q_u8((ptr + 16), res); + tmp1 = _mm_unpackhi_epi8(val->val[0], val->val[1]); // + tmp2 = _mm_unpackhi_epi8(val->val[2], val->val[3]); // + res = _mm_unpacklo_epi16(tmp1, tmp2); // + vst1q_u8((ptr + 32), res); + res = _mm_unpackhi_epi16(tmp1, tmp2); // + vst1q_u8((ptr + 48), res); +} +#define vst4q_u8(ptr, val) vst4q_u8_ptr(ptr, &val) + +//void vst4q_u16(__transfersize(32) uint16_t * ptr, uint16x8x4_t val)// VST4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE void vst4q_u16_ptr(__transfersize(32) uint16_t * ptr, uint16x8x4_t const * val) +{ + uint16x8x4_t v; + __m128i tmp1, tmp2; + tmp1 = _mm_unpacklo_epi16(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13 + tmp2 = _mm_unpacklo_epi16(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15 + v.val[0] = _mm_unpacklo_epi32(tmp1, tmp2); + v.val[1] = _mm_unpackhi_epi32(tmp1, tmp2); + tmp1 = _mm_unpackhi_epi16(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13 + tmp2 = _mm_unpackhi_epi16(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15 + v.val[2] = _mm_unpacklo_epi32(tmp1, tmp2); + v.val[3] = _mm_unpackhi_epi32(tmp1, tmp2); + vst1q_u16(ptr, v.val[0]); + vst1q_u16((ptr + 8), v.val[1]); + vst1q_u16((ptr + 16),v.val[2]); + vst1q_u16((ptr + 24), v.val[3]); +} +#define vst4q_u16(ptr, val) vst4q_u16_ptr(ptr, &val) + +//void vst4q_u32(__transfersize(16) uint32_t * ptr, uint32x4x4_t val)// VST4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE void vst4q_u32_ptr(__transfersize(16) uint32_t * ptr, uint32x4x4_t const * val) +{ + uint16x8x4_t v; + __m128i tmp1, tmp2; + tmp1 = _mm_unpacklo_epi32(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13 + tmp2 = _mm_unpacklo_epi32(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15 + v.val[0] = _mm_unpacklo_epi64(tmp1, tmp2); + v.val[1] = _mm_unpackhi_epi64(tmp1, tmp2); + tmp1 = _mm_unpackhi_epi32(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13 + tmp2 = _mm_unpackhi_epi32(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15 + v.val[2] = _mm_unpacklo_epi64(tmp1, tmp2); + v.val[3] = _mm_unpackhi_epi64(tmp1, tmp2); + vst1q_u32(ptr, v.val[0]); + vst1q_u32((ptr + 4), v.val[1]); + vst1q_u32((ptr + 8), v.val[2]); + vst1q_u32((ptr + 12), v.val[3]); +} +#define vst4q_u32(ptr, val) vst4q_u32_ptr(ptr, &val) + +//void vst4q_s8(__transfersize(64) int8_t * ptr, int8x16x4_t val); +_NEON2SSE_GLOBAL void vst4q_s8_ptr(__transfersize(64) int8_t * ptr, int8x16x4_t const * val); +#define vst4q_s8(ptr, val) vst4q_u8((uint8_t*)(ptr), val) + +//void vst4q_s16(__transfersize(32) int16_t * ptr, int16x8x4_t val); +_NEON2SSE_GLOBAL void vst4q_s16_ptr(__transfersize(32) int16_t * ptr, int16x8x4_t const * val); +#define vst4q_s16(ptr, val) vst4q_u16((uint16_t*)(ptr), val) + +//void vst4q_s32(__transfersize(16) int32_t * ptr, int32x4x4_t val); +_NEON2SSE_GLOBAL void vst4q_s32_ptr(__transfersize(16) int32_t * ptr, int32x4x4_t const * val); +#define vst4q_s32(ptr, val) vst4q_u32((uint32_t*)(ptr), val) + +//void vst4q_f16(__transfersize(32) __fp16 * ptr, float16x8x4_t val);// VST4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_f16_ptr(__transfersize(32) __fp16 * ptr, float16x8x4_t const * val); +// IA32 SIMD doesn't work with 16bit floats currently + +//void vst4q_f32(__transfersize(16) float32_t * ptr, float32x4x4_t val)// VST4.32 {d0, d2, d4, d6}, [r0] +_NEON2SSE_INLINE void vst4q_f32_ptr(__transfersize(16) float32_t * ptr, float32x4x4_t const * val) +{ + __m128 tmp3, tmp2, tmp1, tmp0; + float32x4x4_t v; + tmp0 = _mm_unpacklo_ps(val->val[0], val->val[1]); + tmp2 = _mm_unpacklo_ps(val->val[2], val->val[3]); + tmp1 = _mm_unpackhi_ps(val->val[0], val->val[1]); + tmp3 = _mm_unpackhi_ps(val->val[2], val->val[3]); + v.val[0] = _mm_movelh_ps(tmp0, tmp2); + v.val[1] = _mm_movehl_ps(tmp2, tmp0); + v.val[2] = _mm_movelh_ps(tmp1, tmp3); + v.val[3] = _mm_movehl_ps(tmp3, tmp1); + vst1q_f32(ptr, v.val[0]); + vst1q_f32((ptr + 4), v.val[1]); + vst1q_f32((ptr + 8), v.val[2]); + vst1q_f32((ptr + 12), v.val[3]); +} +#define vst4q_f32(ptr, val) vst4q_f32_ptr(ptr, &val) + +//void vst4q_p8(__transfersize(64) poly8_t * ptr, poly8x16x4_t val);// VST4.8 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_p8_ptr(__transfersize(64) poly8_t * ptr, poly8x16x4_t const * val); +#define vst4q_p8 vst4q_u8 + +//void vst4q_p16(__transfersize(32) poly16_t * ptr, poly16x8x4_t val);// VST4.16 {d0, d2, d4, d6}, [r0] +_NEON2SSE_GLOBAL void vst4q_p16_ptr(__transfersize(32) poly16_t * ptr, poly16x8x4_t const * val); +#define vst4q_p16 vst4q_s16 + +_NEON2SSESTORAGE void vst4_u8(__transfersize(32) uint8_t * ptr, uint8x8x4_t val);// VST4.8 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE void vst4_u8(__transfersize(32) uint8_t * ptr, uint8x8x4_t val) +{ + __m128i sh0, sh1, val0, val2; + sh0 = _mm_unpacklo_epi8(_pM128i(val.val[0]),_pM128i(val.val[1])); // a0,b0,a1,b1,a2,b2,a3,b3,a4,b4,a5,b5, a6,b6,a7,b7, + sh1 = _mm_unpacklo_epi8(_pM128i(val.val[2]),_pM128i(val.val[3])); // c0,d0,c1,d1,c2,d2,c3,d3, c4,d4,c5,d5,c6,d6,c7,d7 + val0 = _mm_unpacklo_epi16(sh0,sh1); // a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3, + val2 = _mm_unpackhi_epi16(sh0,sh1); //a4,b4,c4,d4,a5,b5,c5,d5, a6,b6,c6,d6,a7,b7,c7,d7 + vst1q_u8(ptr, val0); + vst1q_u8((ptr + 16), val2); +} + +_NEON2SSESTORAGE void vst4_u16(__transfersize(16) uint16_t * ptr, uint16x4x4_t val);// VST4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE void vst4_u16(__transfersize(16) uint16_t * ptr, uint16x4x4_t val) +{ + __m128i sh0, sh1, val0, val2; + sh0 = _mm_unpacklo_epi16(_pM128i(val.val[0]),_pM128i(val.val[1])); //a0,a1,b0,b1,c0,c1,d0,d1, + sh1 = _mm_unpacklo_epi16(_pM128i(val.val[2]),_pM128i(val.val[3])); //a2,a3,b2,b3,c2,c3,d2,d3 + val0 = _mm_unpacklo_epi32(sh0,sh1); // a0,a1,a2,a3,b0,b1,b2,b3 + val2 = _mm_unpackhi_epi32(sh0,sh1); // c0,c1,c2,c3,d0,d1,d2,d3 + vst1q_u16(ptr, val0); //store as 128 bit structure + vst1q_u16((ptr + 8), val2); +} + +_NEON2SSESTORAGE void vst4_u32(__transfersize(8) uint32_t * ptr, uint32x2x4_t val);// VST4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE void vst4_u32(__transfersize(8) uint32_t * ptr, uint32x2x4_t val) +{ + //0,4, 1,5, 2,6, 3,7 + __m128i sh0, sh1, val0, val1; + sh0 = _mm_unpacklo_epi32(_pM128i(val.val[0]), _pM128i(val.val[1])); //0,1,4,5 + sh1 = _mm_unpacklo_epi32(_pM128i(val.val[2]), _pM128i(val.val[3])); //2,3,6,7 + val0 = _mm_unpacklo_epi64(sh0,sh1); // + val1 = _mm_unpackhi_epi64(sh0,sh1); // + vst1q_u32(ptr, val0); //store as 128 bit structure + vst1q_u32((ptr + 4), val1); +} + +_NEON2SSESTORAGE void vst4_u64(__transfersize(4) uint64_t * ptr, uint64x1x4_t val);// VST1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE void vst4_u64(__transfersize(4) uint64_t * ptr, uint64x1x4_t val) +{ + *(ptr) = val.val[0].m64_u64[0]; + *(ptr + 1) = val.val[1].m64_u64[0]; + *(ptr + 2) = val.val[2].m64_u64[0]; + *(ptr + 3) = val.val[3].m64_u64[0]; +} + +//void vst4_s8(__transfersize(32) int8_t * ptr, int8x8x4_t val) //VST4.8 {d0, d1, d2, d3}, [r0] +#define vst4_s8(ptr, val) vst4_u8((uint8_t*)ptr, val) + +//void vst4_s16(__transfersize(16) int16_t * ptr, int16x4x4_t val) // VST4.16 {d0, d1, d2, d3}, [r0] +#define vst4_s16(ptr, val) vst4_u16((uint16_t*)ptr, val) + +//void vst4_s32(__transfersize(8) int32_t * ptr, int32x2x4_t val) // VST4.32 {d0, d1, d2, d3}, [r0] +#define vst4_s32(ptr, val) vst4_u32((uint32_t*)ptr, val) + +//void vst4_s64(__transfersize(4) int64_t * ptr, int64x1x4_t val); // VST1.64 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_s64_ptr(__transfersize(4) int64_t * ptr, int64x1x4_t const * val); +#define vst4_s64(ptr, val) vst4_u64((uint64_t*)ptr, val) + +//void vst4_f16(__transfersize(16) __fp16 * ptr, float16x4x4_t val);// VST4.16 {d0, d1, d2, d3}, [r0] +_NEON2SSE_GLOBAL void vst4_f16_ptr(__transfersize(16) __fp16 * ptr, float16x4x4_t const * val); +// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example + +_NEON2SSESTORAGE void vst4_f32(__transfersize(8) float32_t * ptr, float32x2x4_t val);// VST4.32 {d0, d1, d2, d3}, [r0] +_NEON2SSE_INLINE void vst4_f32(__transfersize(8) float32_t * ptr, float32x2x4_t val) +{ + //0,4, 1,5, 2,6, 3,7 -> 0,1, 2,3, 4,5, 6,7 + *(ptr) = val.val[0].m64_f32[0]; + *(ptr + 1) = val.val[1].m64_f32[0]; + *(ptr + 2) = val.val[2].m64_f32[0]; + *(ptr + 3) = val.val[3].m64_f32[0]; + *(ptr + 4) = val.val[0].m64_f32[1]; + *(ptr + 5) = val.val[1].m64_f32[1]; + *(ptr + 6) = val.val[2].m64_f32[1]; + *(ptr + 7) = val.val[3].m64_f32[1]; +} + +_NEON2SSE_GLOBAL void vst4_p8(__transfersize(32) poly8_t * ptr, poly8x8x4_t val);// VST4.8 {d0, d1, d2, d3}, [r0] +#define vst4_p8 vst4_u8 + +_NEON2SSE_GLOBAL void vst4_p16(__transfersize(16) poly16_t * ptr, poly16x4x4_t val);// VST4.16 {d0, d1, d2, d3}, [r0] +#define vst4_p16 vst4_u16 + +//*********** Store a lane of a vector into memory (extract given lane) for a couple of vectors ********************* +//******************************************************************************************************************** +//void vst2q_lane_u16(__transfersize(2) uint16_t * ptr, uint16x8x2_t val, __constrange(0,7) int lane)// VST2.16 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t const * val, __constrange(0,7) int lane) +{ + vst1q_lane_s16(ptr, val->val[0], lane); + vst1q_lane_s16((ptr + 1), val->val[1], lane); +} +#define vst2q_lane_u16(ptr, val, lane) vst2q_lane_u16_ptr(ptr, &val, lane) + +//void vst2q_lane_u32(__transfersize(2) uint32_t * ptr, uint32x4x2_t val, __constrange(0,3) int lane)// VST2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE void vst2q_lane_u32_ptr(__transfersize(2) uint32_t* ptr, uint32x4x2_t const * val, __constrange(0,3) int lane) +{ + vst1q_lane_u32(ptr, val->val[0], lane); + vst1q_lane_u32((ptr + 1), val->val[1], lane); +} +#define vst2q_lane_u32(ptr, val, lane) vst2q_lane_u32_ptr(ptr, &val, lane) + +//void vst2q_lane_s16(__transfersize(2) int16_t * ptr, int16x8x2_t val, __constrange(0,7) int lane);// VST2.16 {d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t const * val, __constrange(0,7) int lane); +#define vst2q_lane_s16(ptr, val, lane) vst2q_lane_u16((uint16_t*)ptr, val, lane) + +//void vst2q_lane_s32(__transfersize(2) int32_t * ptr, int32x4x2_t val, __constrange(0,3) int lane);// VST2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t const * val, __constrange(0,3) int lane); +#define vst2q_lane_s32(ptr, val, lane) vst2q_lane_u32((uint32_t*)ptr, val, lane) + +//void vst2q_lane_f16(__transfersize(2) __fp16 * ptr, float16x8x2_t val, __constrange(0,7) int lane);// VST2.16 {d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t const * val, __constrange(0,7) int lane); +//current IA SIMD doesn't support float16 + +//void vst2q_lane_f32(__transfersize(2) float32_t * ptr, float32x4x2_t val, __constrange(0,3) int lane)// VST2.32 {d0[0], d2[0]}, [r0] +_NEON2SSE_INLINE void vst2q_lane_f32_ptr(__transfersize(2) float32_t* ptr, float32x4x2_t const * val, __constrange(0,3) int lane) +{ + vst1q_lane_f32(ptr, val->val[0], lane); + vst1q_lane_f32((ptr + 1), val->val[1], lane); +} +#define vst2q_lane_f32(ptr,src,lane) vst2q_lane_f32_ptr(ptr,&src,lane) + +//void vst2q_lane_p16(__transfersize(2) poly16_t * ptr, poly16x8x2_t val, __constrange(0,7) int lane);// VST2.16 {d0[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t const * val, __constrange(0,7) int lane); +#define vst2q_lane_p16 vst2q_lane_s16 + +_NEON2SSESTORAGE void vst2_lane_u8(__transfersize(2) uint8_t * ptr, uint8x8x2_t val, __constrange(0,7) int lane);// VST2.8 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE void vst2_lane_u8(__transfersize(2) uint8_t * ptr, uint8x8x2_t val, __constrange(0,7) int lane) // VST2.8 {d0[0], d1[0]}, [r0] +{ + *(ptr) = val.val[0].m64_u8[lane]; + *(ptr + 1) = val.val[1].m64_u8[lane]; +} + +_NEON2SSESTORAGE void vst2_lane_u16(__transfersize(2) uint16_t * ptr, uint16x4x2_t val, __constrange(0,3) int lane);// VST2.16 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE void vst2_lane_u16(__transfersize(2) uint16_t * ptr, uint16x4x2_t val, __constrange(0,3) int lane) +{ + *(ptr) = val.val[0].m64_u16[lane]; + *(ptr + 1) = val.val[1].m64_u16[lane]; +} + +_NEON2SSESTORAGE void vst2_lane_u32(__transfersize(2) uint32_t * ptr, uint32x2x2_t val, __constrange(0,1) int lane);// VST2.32 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE void vst2_lane_u32(__transfersize(2) uint32_t * ptr, uint32x2x2_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.val[0].m64_u32[lane]; + *(ptr + 1) = val.val[1].m64_u32[lane]; +} + +_NEON2SSE_GLOBAL void vst2_lane_s8(__transfersize(2) int8_t * ptr, int8x8x2_t val, __constrange(0,7) int lane);// VST2.8 {d0[0], d1[0]}, [r0] +#define vst2_lane_s8(ptr, val, lane) vst2_lane_u8((uint8_t*)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst2_lane_s16(__transfersize(2) int16_t * ptr, int16x4x2_t val, __constrange(0,3) int lane);// VST2.16 {d0[0], d1[0]}, [r0] +#define vst2_lane_s16(ptr, val, lane) vst2_lane_u16((uint16_t*)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst2_lane_s32(__transfersize(2) int32_t * ptr, int32x2x2_t val, __constrange(0,1) int lane);// VST2.32 {d0[0], d1[0]}, [r0] +#define vst2_lane_s32(ptr, val, lane) vst2_lane_u32((uint32_t*)ptr, val, lane) + +//void vst2_lane_f16(__transfersize(2) __fp16 * ptr, float16x4x2_t val, __constrange(0,3) int lane); // VST2.16 {d0[0], d1[0]}, [r0] +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE void vst2_lane_f32(__transfersize(2) float32_t * ptr, float32x2x2_t val, __constrange(0,1) int lane); // VST2.32 {d0[0], d1[0]}, [r0] +_NEON2SSE_INLINE void vst2_lane_f32(__transfersize(2) float32_t * ptr, float32x2x2_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.val[0].m64_f32[lane]; + *(ptr + 1) = val.val[1].m64_f32[lane]; +} + +_NEON2SSE_GLOBAL void vst2_lane_p8(__transfersize(2) poly8_t * ptr, poly8x8x2_t val, __constrange(0,7) int lane);// VST2.8 {d0[0], d1[0]}, [r0] +#define vst2_lane_p8 vst2_lane_u8 + +_NEON2SSE_GLOBAL void vst2_lane_p16(__transfersize(2) poly16_t * ptr, poly16x4x2_t val, __constrange(0,3) int lane);// VST2.16 {d0[0], d1[0]}, [r0] +#define vst2_lane_p16 vst2_lane_u16 + +//************************* Triple lanes stores ******************************************************* +//******************************************************************************************************* +//void vst3q_lane_u16(__transfersize(3) uint16_t * ptr, uint16x8x3_t val, __constrange(0,7) int lane)// VST3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE void vst3q_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x8x3_t const * val, __constrange(0,7) int lane) +{ + vst2q_lane_u16_ptr(ptr, (uint16x8x2_t*)val, lane); + vst1q_lane_u16((ptr + 2), val->val[2], lane); +} +#define vst3q_lane_u16(ptr, val, lane) vst3q_lane_u16_ptr(ptr, &val, lane) + +//void vst3q_lane_u32(__transfersize(3) uint32_t * ptr, uint32x4x3_t val, __constrange(0,3) int lane)// VST3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE void vst3q_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x4x3_t const * val, __constrange(0,3) int lane) +{ + vst2q_lane_u32_ptr(ptr, (uint32x4x2_t*)val, lane); + vst1q_lane_u32((ptr + 2), val->val[2], lane); +} +#define vst3q_lane_u32(ptr, val, lane) vst3q_lane_u32_ptr(ptr, &val, lane) + +//void vst3q_lane_s16(__transfersize(3) int16_t * ptr, int16x8x3_t val, __constrange(0,7) int lane);// VST3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t const * val, __constrange(0,7) int lane); +#define vst3q_lane_s16(ptr, val, lane) vst3q_lane_u16((uint16_t *)ptr, val, lane) + +//void vst3q_lane_s32(__transfersize(3) int32_t * ptr, int32x4x3_t val, __constrange(0,3) int lane);// VST3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t const * val, __constrange(0,3) int lane); +#define vst3q_lane_s32(ptr, val, lane) vst3q_lane_u32((uint32_t *)ptr, val, lane) + +//void vst3q_lane_f16(__transfersize(3) __fp16 * ptr, float16x8x3_t val, __constrange(0,7) int lane);// VST3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t const * val, __constrange(0,7) int lane); +//current IA SIMD doesn't support float16 + +//void vst3q_lane_f32(__transfersize(3) float32_t * ptr, float32x4x3_t val, __constrange(0,3) int lane)// VST3.32 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_INLINE void vst3q_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x4x3_t const * val, __constrange(0,3) int lane) +{ + vst1q_lane_f32(ptr, val->val[0], lane); + vst1q_lane_f32((ptr + 1), val->val[1], lane); + vst1q_lane_f32((ptr + 2), val->val[2], lane); +} +#define vst3q_lane_f32(ptr,val,lane) vst3q_lane_f32_ptr(ptr,&val,lane) + +//void vst3q_lane_p16(__transfersize(3) poly16_t * ptr, poly16x8x3_t val, __constrange(0,7) int lane);// VST3.16 {d0[0], d2[0], d4[0]}, [r0] +_NEON2SSE_GLOBAL void vst3q_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x8x3_t const * val, __constrange(0,7) int lane); +#define vst3q_lane_p16 vst3q_lane_s16 + +_NEON2SSESTORAGE void vst3_lane_u8(__transfersize(3) uint8_t * ptr, uint8x8x3_t val, __constrange(0,7) int lane);// VST3.8 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE void vst3_lane_u8(__transfersize(3) uint8_t * ptr, uint8x8x3_t val, __constrange(0,7) int lane) +{ + *(ptr) = val.val[0].m64_u8[lane]; + *(ptr + 1) = val.val[1].m64_u8[lane]; + *(ptr + 2) = val.val[2].m64_u8[lane]; +} + +_NEON2SSESTORAGE void vst3_lane_u16(__transfersize(3) uint16_t * ptr, uint16x4x3_t val, __constrange(0,3) int lane);// VST3.16 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE void vst3_lane_u16(__transfersize(3) uint16_t * ptr, uint16x4x3_t val, __constrange(0,3) int lane) +{ + *(ptr) = val.val[0].m64_u16[lane]; + *(ptr + 1) = val.val[1].m64_u16[lane]; + *(ptr + 2) = val.val[2].m64_u16[lane]; +} + +_NEON2SSESTORAGE void vst3_lane_u32(__transfersize(3) uint32_t * ptr, uint32x2x3_t val, __constrange(0,1) int lane);// VST3.32 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE void vst3_lane_u32(__transfersize(3) uint32_t * ptr, uint32x2x3_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.val[0].m64_u32[lane]; + *(ptr + 1) = val.val[1].m64_u32[lane]; + *(ptr + 2) = val.val[2].m64_u32[lane]; +} + +_NEON2SSE_GLOBAL void vst3_lane_s8(__transfersize(3) int8_t * ptr, int8x8x3_t val, __constrange(0,7) int lane);// VST3.8 {d0[0], d1[0], d2[0]}, [r0] +#define vst3_lane_s8(ptr, val, lane) vst3_lane_u8((uint8_t *)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst3_lane_s16(__transfersize(3) int16_t * ptr, int16x4x3_t val, __constrange(0,3) int lane);// VST3.16 {d0[0], d1[0], d2[0]}, [r0] +#define vst3_lane_s16(ptr, val, lane) vst3_lane_u16((uint16_t *)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst3_lane_s32(__transfersize(3) int32_t * ptr, int32x2x3_t val, __constrange(0,1) int lane);// VST3.32 {d0[0], d1[0], d2[0]}, [r0] +#define vst3_lane_s32(ptr, val, lane) vst3_lane_u32((uint32_t *)ptr, val, lane) + +//void vst3_lane_f16(__transfersize(3) __fp16 * ptr, float16x4x3_t val, __constrange(0,3) int lane);// VST3.16 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_GLOBAL void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t const * val, __constrange(0,3) int lane); +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE void vst3_lane_f32(__transfersize(3) float32_t * ptr, float32x2x3_t val, __constrange(0,1) int lane);// VST3.32 {d0[0], d1[0], d2[0]}, [r0] +_NEON2SSE_INLINE void vst3_lane_f32(__transfersize(3) float32_t * ptr, float32x2x3_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.val[0].m64_f32[lane]; + *(ptr + 1) = val.val[1].m64_f32[lane]; + *(ptr + 2) = val.val[2].m64_f32[lane]; +} + +_NEON2SSE_GLOBAL void vst3_lane_p8(__transfersize(3) poly8_t * ptr, poly8x8x3_t val, __constrange(0,7) int lane);// VST3.8 {d0[0], d1[0], d2[0]}, [r0] +#define vst3_lane_p8 vst3_lane_u8 + +_NEON2SSE_GLOBAL void vst3_lane_p16(__transfersize(3) poly16_t * ptr, poly16x4x3_t val, __constrange(0,3) int lane);// VST3.16 {d0[0], d1[0], d2[0]}, [r0] +#define vst3_lane_p16 vst3_lane_u16 + +//******************************** Quadruple lanes stores *********************************************** +//******************************************************************************************************* +//void vst4q_lane_u16(__transfersize(4) uint16_t * ptr, uint16x8x4_t val, __constrange(0,7) int lane)// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_INLINE void vst4q_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x8x4_t const * val4, __constrange(0,7) int lane) +{ + vst2q_lane_u16_ptr(ptr, (uint16x8x2_t*)val4->val, lane); + vst2q_lane_u16_ptr((ptr + 2),((uint16x8x2_t*)val4->val + 1), lane); +} +#define vst4q_lane_u16(ptr, val, lane) vst4q_lane_u16_ptr(ptr, &val, lane) + +//void vst4q_lane_u32(__transfersize(4) uint32_t * ptr, uint32x4x4_t val, __constrange(0,3) int lane)// VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_INLINE void vst4q_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x4x4_t const * val4, __constrange(0,3) int lane) +{ + vst2q_lane_u32_ptr(ptr, (uint32x4x2_t*)val4->val, lane); + vst2q_lane_u32_ptr((ptr + 2), ((uint32x4x2_t*)val4->val + 1), lane); +} +#define vst4q_lane_u32(ptr, val, lane) vst4q_lane_u32_ptr(ptr, &val, lane) + +//void vst4q_lane_s16(__transfersize(4) int16_t * ptr, int16x8x4_t val, __constrange(0,7) int lane);// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t const * val, __constrange(0,7) int lane); +#define vst4q_lane_s16(ptr,val,lane) vst4q_lane_u16((uint16_t *)ptr,val,lane) + +//void vst4q_lane_s32(__transfersize(4) int32_t * ptr, int32x4x4_t val, __constrange(0,3) int lane);// VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t const * val, __constrange(0,3) int lane); +#define vst4q_lane_s32(ptr,val,lane) vst4q_lane_u32((uint32_t *)ptr,val,lane) + +//void vst4q_lane_f16(__transfersize(4) __fp16 * ptr, float16x8x4_t val, __constrange(0,7) int lane);// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t const * val, __constrange(0,7) int lane); +//current IA SIMD doesn't support float16 + +//void vst4q_lane_f32(__transfersize(4) float32_t * ptr, float32x4x4_t val, __constrange(0,3) int lane)// VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_INLINE void vst4q_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x4x4_t const * val, __constrange(0,3) int lane) +{ + vst1q_lane_f32(ptr, val->val[0], lane); + vst1q_lane_f32((ptr + 1), val->val[1], lane); + vst1q_lane_f32((ptr + 2), val->val[2], lane); + vst1q_lane_f32((ptr + 3), val->val[3], lane); +} +#define vst4q_lane_f32(ptr,val,lane) vst4q_lane_f32_ptr(ptr,&val,lane) + +//void vst4q_lane_p16(__transfersize(4) poly16_t * ptr, poly16x8x4_t val, __constrange(0,7) int lane);// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] +_NEON2SSE_GLOBAL void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t const * val, __constrange(0,7) int lane); +#define vst4q_lane_p16 vst4q_lane_u16 + +_NEON2SSESTORAGE void vst4_lane_u8(__transfersize(4) uint8_t * ptr, uint8x8x4_t val, __constrange(0,7) int lane);// VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE void vst4_lane_u8(__transfersize(4) uint8_t * ptr, uint8x8x4_t val, __constrange(0,7) int lane) +{ + *(ptr) = val.val[0].m64_u8[lane]; + *(ptr + 1) = val.val[1].m64_u8[lane]; + *(ptr + 2) = val.val[2].m64_u8[lane]; + *(ptr + 3) = val.val[3].m64_u8[lane]; +} + +_NEON2SSESTORAGE void vst4_lane_u16(__transfersize(4) uint16_t * ptr, uint16x4x4_t val, __constrange(0,3) int lane);// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE void vst4_lane_u16(__transfersize(4) uint16_t * ptr, uint16x4x4_t val, __constrange(0,3) int lane) +{ + *(ptr) = val.val[0].m64_u16[lane]; + *(ptr + 1) = val.val[1].m64_u16[lane]; + *(ptr + 2) = val.val[2].m64_u16[lane]; + *(ptr + 3) = val.val[3].m64_u16[lane]; +} + +_NEON2SSESTORAGE void vst4_lane_u32(__transfersize(4) uint32_t * ptr, uint32x2x4_t val, __constrange(0,1) int lane);// VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE void vst4_lane_u32(__transfersize(4) uint32_t * ptr, uint32x2x4_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.val[0].m64_u32[lane]; + *(ptr + 1) = val.val[1].m64_u32[lane]; + *(ptr + 2) = val.val[2].m64_u32[lane]; + *(ptr + 3) = val.val[3].m64_u32[lane]; +} + +_NEON2SSE_GLOBAL void vst4_lane_s8(__transfersize(4) int8_t * ptr, int8x8x4_t val, __constrange(0,7) int lane);// VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vst4_lane_s8(ptr, val, lane) vst4_lane_u8((uint8_t*)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst4_lane_s16(__transfersize(4) int16_t * ptr, int16x4x4_t val, __constrange(0,3) int lane);// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vst4_lane_s16(ptr, val, lane) vst4_lane_u16((uint16_t*)ptr, val, lane) + +_NEON2SSE_GLOBAL void vst4_lane_s32(__transfersize(4) int32_t * ptr, int32x2x4_t val, __constrange(0,1) int lane);// VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vst4_lane_s32(ptr, val, lane) vst4_lane_u32((uint32_t*)ptr, val, lane) + +//void vst4_lane_f16(__transfersize(4) __fp16 * ptr, float16x4x4_t val, __constrange(0,3) int lane);// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_GLOBAL void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t const * val, __constrange(0,3) int lane); +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE void vst4_lane_f32(__transfersize(4) float32_t * ptr, float32x2x4_t val, __constrange(0,1) int lane); // VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] +_NEON2SSE_INLINE void vst4_lane_f32(__transfersize(4) float32_t * ptr, float32x2x4_t val, __constrange(0,1) int lane) +{ + *(ptr) = val.val[0].m64_f32[lane]; + *(ptr + 1) = val.val[1].m64_f32[lane]; + *(ptr + 2) = val.val[2].m64_f32[lane]; + *(ptr + 3) = val.val[3].m64_f32[lane]; +} + +_NEON2SSE_GLOBAL void vst4_lane_p8(__transfersize(4) poly8_t * ptr, poly8x8x4_t val, __constrange(0,7) int lane);// VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vst4_lane_p8 vst4_lane_u8 + +_NEON2SSE_GLOBAL void vst4_lane_p16(__transfersize(4) poly16_t * ptr, poly16x4x4_t val, __constrange(0,3) int lane);// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] +#define vst4_lane_p16 vst4_lane_u16 + +//************************************************************************************************** +//************************ Extract lanes from a vector ******************************************** +//************************************************************************************************** +//These intrinsics extract a single lane (element) from a vector. +_NEON2SSE_GLOBAL uint8_t vget_lane_u8(uint8x8_t vec, __constrange(0,7) int lane); // VMOV.U8 r0, d0[0] +#define vget_lane_u8(vec, lane) vec.m64_u8[lane] + +_NEON2SSE_GLOBAL uint16_t vget_lane_u16(uint16x4_t vec, __constrange(0,3) int lane); // VMOV.s16 r0, d0[0] +#define vget_lane_u16(vec, lane) vec.m64_u16[lane] + + +_NEON2SSE_GLOBAL uint32_t vget_lane_u32(uint32x2_t vec, __constrange(0,1) int lane); // VMOV.32 r0, d0[0] +#define vget_lane_u32(vec, lane) vec.m64_u32[lane] + +_NEON2SSE_GLOBAL int8_t vget_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VMOV.S8 r0, d0[0] +#define vget_lane_s8(vec, lane) vec.m64_i8[lane] + +_NEON2SSE_GLOBAL int16_t vget_lane_s16(int16x4_t vec, __constrange(0,3) int lane); // VMOV.S16 r0, d0[0] +#define vget_lane_s16(vec, lane) vec.m64_i16[lane] + +_NEON2SSE_GLOBAL int32_t vget_lane_s32(int32x2_t vec, __constrange(0,1) int lane); // VMOV.32 r0, d0[0] +#define vget_lane_s32(vec, lane) vec.m64_i32[lane] + +_NEON2SSE_GLOBAL poly8_t vget_lane_p8(poly8x8_t vec, __constrange(0,7) int lane); // VMOV.U8 r0, d0[0] +#define vget_lane_p8 vget_lane_u8 + +_NEON2SSE_GLOBAL poly16_t vget_lane_p16(poly16x4_t vec, __constrange(0,3) int lane); // VMOV.s16 r0, d0[0] +#define vget_lane_p16 vget_lane_u16 + +_NEON2SSE_GLOBAL float32_t vget_lane_f32(float32x2_t vec, __constrange(0,1) int lane); // VMOV.32 r0, d0[0] +#define vget_lane_f32(vec, lane) vec.m64_f32[lane] + +_NEON2SSE_GLOBAL uint8_t vgetq_lane_u8(uint8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0] +#define vgetq_lane_u8 (uint8_t) _MM_EXTRACT_EPI8 + +_NEON2SSE_GLOBAL uint16_t vgetq_lane_u16(uint16x8_t vec, __constrange(0,7) int lane); // VMOV.s16 r0, d0[0] +#define vgetq_lane_u16 (uint16_t) _MM_EXTRACT_EPI16 + +_NEON2SSE_GLOBAL uint32_t vgetq_lane_u32(uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0] +#define vgetq_lane_u32 (uint32_t) _MM_EXTRACT_EPI32 + +_NEON2SSE_GLOBAL int8_t vgetq_lane_s8(int8x16_t vec, __constrange(0,15) int lane); // VMOV.S8 r0, d0[0] +#define vgetq_lane_s8 _MM_EXTRACT_EPI8 + +_NEON2SSE_GLOBAL int16_t vgetq_lane_s16(int16x8_t vec, __constrange(0,7) int lane); // VMOV.S16 r0, d0[0] +#define vgetq_lane_s16 _MM_EXTRACT_EPI16 + +_NEON2SSE_GLOBAL int32_t vgetq_lane_s32(int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0] +#define vgetq_lane_s32 _MM_EXTRACT_EPI32 + +_NEON2SSE_GLOBAL poly8_t vgetq_lane_p8(poly8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0] +#define vgetq_lane_p8 vgetq_lane_u8 + +_NEON2SSE_GLOBAL poly16_t vgetq_lane_p16(poly16x8_t vec, __constrange(0,7) int lane); // VMOV.s16 r0, d0[0] +#define vgetq_lane_p16 vgetq_lane_u16 + +_NEON2SSESTORAGE float32_t vgetq_lane_f32(float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0] +_NEON2SSE_INLINE float32_t vgetq_lane_f32(float32x4_t vec, __constrange(0,3) int lane) +{ + float32_t floatVal; + char * const floatVal_c = (char*)&floatVal; + *((int32_t*)floatVal_c) = _MM_EXTRACT_PS(vec,lane); + return floatVal; +} + +_NEON2SSE_GLOBAL int64_t vget_lane_s64(int64x1_t vec, __constrange(0,0) int lane); // VMOV r0,r0,d0 +#define vget_lane_s64(vec, lane) vec.m64_i64[0] + +_NEON2SSE_GLOBAL uint64_t vget_lane_u64(uint64x1_t vec, __constrange(0,0) int lane); // VMOV r0,r0,d0 +#define vget_lane_u64(vec, lane) vec.m64_u64[0] + + +_NEON2SSE_GLOBAL int64_t vgetq_lane_s64(int64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0 +#define vgetq_lane_s64 _MM_EXTRACT_EPI64 + +_NEON2SSE_GLOBAL uint64_t vgetq_lane_u64(uint64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0 +#define vgetq_lane_u64 (uint64_t) _MM_EXTRACT_EPI64 + +// ***************** Set lanes within a vector ******************************************** +// ************************************************************************************** +//These intrinsics set a single lane (element) within a vector. +//same functions as vld1_lane_xx ones, but take the value to be set directly. + +_NEON2SSESTORAGE uint8x8_t vset_lane_u8(uint8_t value, uint8x8_t vec, __constrange(0,7) int lane); // VMOV.8 d0[0],r0 +_NEON2SSE_INLINE uint8x8_t vset_lane_u8(uint8_t value, uint8x8_t vec, __constrange(0,7) int lane) +{ + uint8_t val; + val = value; + return vld1_lane_u8(&val, vec, lane); +} + +_NEON2SSESTORAGE uint16x4_t vset_lane_u16(uint16_t value, uint16x4_t vec, __constrange(0,3) int lane); // VMOV.16 d0[0],r0 +_NEON2SSE_INLINE uint16x4_t vset_lane_u16(uint16_t value, uint16x4_t vec, __constrange(0,3) int lane) +{ + uint16_t val; + val = value; + return vld1_lane_u16(&val, vec, lane); +} + +_NEON2SSESTORAGE uint32x2_t vset_lane_u32(uint32_t value, uint32x2_t vec, __constrange(0,1) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_INLINE uint32x2_t vset_lane_u32(uint32_t value, uint32x2_t vec, __constrange(0,1) int lane) +{ + uint32_t val; + val = value; + return vld1_lane_u32(&val, vec, lane); +} + +_NEON2SSESTORAGE int8x8_t vset_lane_s8(int8_t value, int8x8_t vec, __constrange(0,7) int lane); // VMOV.8 d0[0],r0 +_NEON2SSE_INLINE int8x8_t vset_lane_s8(int8_t value, int8x8_t vec, __constrange(0,7) int lane) +{ + int8_t val; + val = value; + return vld1_lane_s8(&val, vec, lane); +} + +_NEON2SSESTORAGE int16x4_t vset_lane_s16(int16_t value, int16x4_t vec, __constrange(0,3) int lane); // VMOV.16 d0[0],r0 +_NEON2SSE_INLINE int16x4_t vset_lane_s16(int16_t value, int16x4_t vec, __constrange(0,3) int lane) +{ + int16_t val; + val = value; + return vld1_lane_s16(&val, vec, lane); +} + +_NEON2SSESTORAGE int32x2_t vset_lane_s32(int32_t value, int32x2_t vec, __constrange(0,1) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_INLINE int32x2_t vset_lane_s32(int32_t value, int32x2_t vec, __constrange(0,1) int lane) +{ + int32_t val; + val = value; + return vld1_lane_s32(&val, vec, lane); +} + +_NEON2SSE_GLOBAL poly8x8_t vset_lane_p8(poly8_t value, poly8x8_t vec, __constrange(0,7) int lane); // VMOV.8 d0[0],r0 +#define vset_lane_p8 vset_lane_u8 + +_NEON2SSE_GLOBAL poly16x4_t vset_lane_p16(poly16_t value, poly16x4_t vec, __constrange(0,3) int lane); // VMOV.16 d0[0],r0 +#define vset_lane_p16 vset_lane_u16 + +_NEON2SSESTORAGE float32x2_t vset_lane_f32(float32_t value, float32x2_t vec, __constrange(0,1) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_INLINE float32x2_t vset_lane_f32(float32_t value, float32x2_t vec, __constrange(0,1) int lane) +{ + float32_t val; + val = value; + return vld1_lane_f32(&val, vec, lane); +} + +_NEON2SSESTORAGE uint8x16_t vsetq_lane_u8(uint8_t value, uint8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0 +_NEON2SSE_INLINE uint8x16_t vsetq_lane_u8(uint8_t value, uint8x16_t vec, __constrange(0,15) int lane) +{ + uint8_t val; + val = value; + return vld1q_lane_u8(&val, vec, lane); +} + +_NEON2SSESTORAGE uint16x8_t vsetq_lane_u16(uint16_t value, uint16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0 +_NEON2SSE_INLINE uint16x8_t vsetq_lane_u16(uint16_t value, uint16x8_t vec, __constrange(0,7) int lane) +{ + uint16_t val; + val = value; + return vld1q_lane_u16(&val, vec, lane); +} + +_NEON2SSESTORAGE uint32x4_t vsetq_lane_u32(uint32_t value, uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_INLINE uint32x4_t vsetq_lane_u32(uint32_t value, uint32x4_t vec, __constrange(0,3) int lane) +{ + uint32_t val; + val = value; + return vld1q_lane_u32(&val, vec, lane); +} + +_NEON2SSESTORAGE int8x16_t vsetq_lane_s8(int8_t value, int8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0 +_NEON2SSE_INLINE int8x16_t vsetq_lane_s8(int8_t value, int8x16_t vec, __constrange(0,15) int lane) +{ + int8_t val; + val = value; + return vld1q_lane_s8(&val, vec, lane); +} + +_NEON2SSESTORAGE int16x8_t vsetq_lane_s16(int16_t value, int16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0 +_NEON2SSE_INLINE int16x8_t vsetq_lane_s16(int16_t value, int16x8_t vec, __constrange(0,7) int lane) +{ + int16_t val; + val = value; + return vld1q_lane_s16(&val, vec, lane); +} + +_NEON2SSESTORAGE int32x4_t vsetq_lane_s32(int32_t value, int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_INLINE int32x4_t vsetq_lane_s32(int32_t value, int32x4_t vec, __constrange(0,3) int lane) +{ + int32_t val; + val = value; + return vld1q_lane_s32(&val, vec, lane); +} + +_NEON2SSE_GLOBAL poly8x16_t vsetq_lane_p8(poly8_t value, poly8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0 +#define vsetq_lane_p8 vsetq_lane_u8 + +_NEON2SSE_GLOBAL poly16x8_t vsetq_lane_p16(poly16_t value, poly16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0 +#define vsetq_lane_p16 vsetq_lane_u16 + +_NEON2SSESTORAGE float32x4_t vsetq_lane_f32(float32_t value, float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0 +_NEON2SSE_INLINE float32x4_t vsetq_lane_f32(float32_t value, float32x4_t vec, __constrange(0,3) int lane) +{ + float32_t val; + val = value; + return vld1q_lane_f32(&val, vec, lane); +} + +_NEON2SSESTORAGE int64x1_t vset_lane_s64(int64_t value, int64x1_t vec, __constrange(0,0) int lane); // VMOV d0,r0,r0 +_NEON2SSE_INLINE int64x1_t vset_lane_s64(int64_t value, int64x1_t vec, __constrange(0,0) int lane) +{ + int64_t val; + val = value; + return vld1_lane_s64(&val, vec, lane); +} + +_NEON2SSESTORAGE uint64x1_t vset_lane_u64(uint64_t value, uint64x1_t vec, __constrange(0,0) int lane); // VMOV d0,r0,r0 +_NEON2SSE_INLINE uint64x1_t vset_lane_u64(uint64_t value, uint64x1_t vec, __constrange(0,0) int lane) +{ + uint64_t val; + val = value; + return vld1_lane_u64(&val, vec, lane); +} + +_NEON2SSESTORAGE int64x2_t vsetq_lane_s64(int64_t value, int64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0 +_NEON2SSE_INLINE int64x2_t vsetq_lane_s64(int64_t value, int64x2_t vec, __constrange(0,1) int lane) +{ + uint64_t val; + val = value; + return vld1q_lane_s64(&val, vec, lane); +} + +_NEON2SSE_GLOBAL uint64x2_t vsetq_lane_u64(uint64_t value, uint64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0 +#define vsetq_lane_u64 vsetq_lane_s64 + +// ******************************************************************************* +// **************** Initialize a vector from bit pattern *************************** +// ******************************************************************************* +//These intrinsics create a vector from a literal bit pattern. +_NEON2SSESTORAGE int8x8_t vcreate_s8(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_INLINE int8x8_t vcreate_s8(uint64_t a) +{ + return (*(__m64_128*)&(a)); //here we couldn't use macro due to possible immediate value usage +} + +_NEON2SSE_GLOBAL int16x4_t vcreate_s16(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_s16 vcreate_s8 + +_NEON2SSE_GLOBAL int32x2_t vcreate_s32(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_s32 vcreate_s8 + +_NEON2SSE_GLOBAL float16x4_t vcreate_f16(uint64_t a); // VMOV d0,r0,r0 +//no IA32 SIMD avalilable + +_NEON2SSESTORAGE float32x2_t vcreate_f32(uint64_t a); // VMOV d0,r0,r0 +_NEON2SSE_INLINE float32x2_t vcreate_f32(uint64_t a) +{ + return (*(__m64_128*)&(a)); //here we couldn't use macro due to possible immediate value usage +} + +_NEON2SSE_GLOBAL uint8x8_t vcreate_u8(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_u8 vcreate_s8 + +_NEON2SSE_GLOBAL uint16x4_t vcreate_u16(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_u16 vcreate_s16 + +_NEON2SSE_GLOBAL uint32x2_t vcreate_u32(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_u32 vcreate_s32 + +_NEON2SSE_GLOBAL uint64x1_t vcreate_u64(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_u64 vcreate_s8 + + +_NEON2SSE_GLOBAL poly8x8_t vcreate_p8(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_p8 vcreate_u8 + +_NEON2SSE_GLOBAL poly16x4_t vcreate_p16(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_p16 vcreate_u16 + +_NEON2SSE_GLOBAL int64x1_t vcreate_s64(uint64_t a); // VMOV d0,r0,r0 +#define vcreate_s64 vcreate_u64 + +//********************* Set all lanes to same value ******************************** +//********************************************************************************* +//These intrinsics set all lanes to the same value. +_NEON2SSESTORAGE uint8x8_t vdup_n_u8(uint8_t value); // VDUP.8 d0,r0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x8_t vdup_n_u8(uint8_t value), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint8x8_t res; + int i; + for (i = 0; i<8; i++) { + res.m64_u8[i] = value; + } + return res; +} + +_NEON2SSESTORAGE uint16x4_t vdup_n_u16(uint16_t value); // VDUP.16 d0,r0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x4_t vdup_n_u16(uint16_t value), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint16x4_t res; + int i; + for (i = 0; i<4; i++) { + res.m64_u16[i] = value; + } + return res; +} + +_NEON2SSESTORAGE uint32x2_t vdup_n_u32(uint32_t value); // VDUP.32 d0,r0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vdup_n_u32(uint32_t value), _NEON2SSE_REASON_SLOW_SERIAL) +{ + uint32x2_t res; + res.m64_u32[0] = value; + res.m64_u32[1] = value; + return res; +} + +_NEON2SSESTORAGE int8x8_t vdup_n_s8(int8_t value); // VDUP.8 d0,r0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x8_t vdup_n_s8(int8_t value), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int8x8_t res; + int i; + for (i = 0; i<8; i++) { + res.m64_i8[i] = value; + } + return res; +} + +_NEON2SSESTORAGE int16x4_t vdup_n_s16(int16_t value); // VDUP.16 d0,r0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x4_t vdup_n_s16(int16_t value), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int16x4_t res; + int i; + for (i = 0; i<4; i++) { + res.m64_i16[i] = value; + } + return res; +} + +_NEON2SSESTORAGE int32x2_t vdup_n_s32(int32_t value); // VDUP.32 d0,r0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vdup_n_s32(int32_t value), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int32x2_t res; + res.m64_i32[0] = value; + res.m64_i32[1] = value; + return res; +} + +_NEON2SSE_GLOBAL poly8x8_t vdup_n_p8(poly8_t value); // VDUP.8 d0,r0 +#define vdup_n_p8 vdup_n_u8 + +_NEON2SSE_GLOBAL poly16x4_t vdup_n_p16(poly16_t value); // VDUP.16 d0,r0 +#define vdup_n_p16 vdup_n_s16 + +_NEON2SSESTORAGE float32x2_t vdup_n_f32(float32_t value); // VDUP.32 d0,r0 +_NEON2SSE_INLINE float32x2_t vdup_n_f32(float32_t value) +{ + float32x2_t res; + res.m64_f32[0] = value; + res.m64_f32[1] = value; + return res; +} + +_NEON2SSE_GLOBAL uint8x16_t vdupq_n_u8(uint8_t value); // VDUP.8 q0,r0 +#define vdupq_n_u8(value) _mm_set1_epi8((int8_t) (value)) + +_NEON2SSE_GLOBAL uint16x8_t vdupq_n_u16(uint16_t value); // VDUP.16 q0,r0 +#define vdupq_n_u16(value) _mm_set1_epi16((int16_t) (value)) + +_NEON2SSE_GLOBAL uint32x4_t vdupq_n_u32(uint32_t value); // VDUP.32 q0,r0 +#define vdupq_n_u32(value) _mm_set1_epi32((int32_t) (value)) + +_NEON2SSE_GLOBAL int8x16_t vdupq_n_s8(int8_t value); // VDUP.8 q0,r0 +#define vdupq_n_s8 _mm_set1_epi8 + +_NEON2SSE_GLOBAL int16x8_t vdupq_n_s16(int16_t value); // VDUP.16 q0,r0 +#define vdupq_n_s16 _mm_set1_epi16 + +_NEON2SSE_GLOBAL int32x4_t vdupq_n_s32(int32_t value); // VDUP.32 q0,r0 +#define vdupq_n_s32 _mm_set1_epi32 + +_NEON2SSE_GLOBAL poly8x16_t vdupq_n_p8(poly8_t value); // VDUP.8 q0,r0 +#define vdupq_n_p8 vdupq_n_u8 + +_NEON2SSE_GLOBAL poly16x8_t vdupq_n_p16(poly16_t value); // VDUP.16 q0,r0 +#define vdupq_n_p16 vdupq_n_u16 + +_NEON2SSE_GLOBAL float32x4_t vdupq_n_f32(float32_t value); // VDUP.32 q0,r0 +#define vdupq_n_f32 _mm_set1_ps + +_NEON2SSESTORAGE int64x1_t vdup_n_s64(int64_t value); // VMOV d0,r0,r0 +_NEON2SSE_INLINE int64x1_t vdup_n_s64(int64_t value) +{ + int64x1_t res; + res.m64_i64[0] = value; + return res; +} + +_NEON2SSESTORAGE uint64x1_t vdup_n_u64(uint64_t value); // VMOV d0,r0,r0 +_NEON2SSE_INLINE uint64x1_t vdup_n_u64(uint64_t value) +{ + uint64x1_t res; + res.m64_u64[0] = value; + return res; +} + +_NEON2SSESTORAGE int64x2_t vdupq_n_s64(int64_t value); // VMOV d0,r0,r0 +_NEON2SSE_INLINE int64x2_t vdupq_n_s64(int64_t value) +{ + _NEON2SSE_ALIGN_16 int64_t value2[2]; + + value2[0] = value; + value2[1] = value; + + return LOAD_SI128(value2); +} + +_NEON2SSESTORAGE uint64x2_t vdupq_n_u64(uint64_t value); // VMOV d0,r0,r0 +_NEON2SSE_INLINE uint64x2_t vdupq_n_u64(uint64_t value) +{ + _NEON2SSE_ALIGN_16 uint64_t val[2]; + + val[0] = value; + val[1] = value; + + return LOAD_SI128(val); +} + +//**** Set all lanes to same value ************************ +//Same functions as above - just aliaces.******************** +//Probably they reflect the fact that 128-bit functions versions use VMOV instruction ********** +_NEON2SSE_GLOBAL uint8x8_t vmov_n_u8(uint8_t value); // VDUP.8 d0,r0 +#define vmov_n_u8 vdup_n_s8 + +_NEON2SSE_GLOBAL uint16x4_t vmov_n_u16(uint16_t value); // VDUP.16 d0,r0 +#define vmov_n_u16 vdup_n_s16 + +_NEON2SSE_GLOBAL uint32x2_t vmov_n_u32(uint32_t value); // VDUP.32 d0,r0 +#define vmov_n_u32 vdup_n_u32 + +_NEON2SSE_GLOBAL int8x8_t vmov_n_s8(int8_t value); // VDUP.8 d0,r0 +#define vmov_n_s8 vdup_n_s8 + +_NEON2SSE_GLOBAL int16x4_t vmov_n_s16(int16_t value); // VDUP.16 d0,r0 +#define vmov_n_s16 vdup_n_s16 + +_NEON2SSE_GLOBAL int32x2_t vmov_n_s32(int32_t value); // VDUP.32 d0,r0 +#define vmov_n_s32 vdup_n_s32 + +_NEON2SSE_GLOBAL poly8x8_t vmov_n_p8(poly8_t value); // VDUP.8 d0,r0 +#define vmov_n_p8 vdup_n_u8 + +_NEON2SSE_GLOBAL poly16x4_t vmov_n_p16(poly16_t value); // VDUP.16 d0,r0 +#define vmov_n_p16 vdup_n_s16 + +_NEON2SSE_GLOBAL float32x2_t vmov_n_f32(float32_t value); // VDUP.32 d0,r0 +#define vmov_n_f32 vdup_n_f32 + +_NEON2SSE_GLOBAL uint8x16_t vmovq_n_u8(uint8_t value); // VDUP.8 q0,r0 +#define vmovq_n_u8 vdupq_n_u8 + +_NEON2SSE_GLOBAL uint16x8_t vmovq_n_u16(uint16_t value); // VDUP.16 q0,r0 +#define vmovq_n_u16 vdupq_n_s16 + +_NEON2SSE_GLOBAL uint32x4_t vmovq_n_u32(uint32_t value); // VDUP.32 q0,r0 +#define vmovq_n_u32 vdupq_n_u32 + +_NEON2SSE_GLOBAL int8x16_t vmovq_n_s8(int8_t value); // VDUP.8 q0,r0 +#define vmovq_n_s8 vdupq_n_s8 + +_NEON2SSE_GLOBAL int16x8_t vmovq_n_s16(int16_t value); // VDUP.16 q0,r0 +#define vmovq_n_s16 vdupq_n_s16 + +_NEON2SSE_GLOBAL int32x4_t vmovq_n_s32(int32_t value); // VDUP.32 q0,r0 +#define vmovq_n_s32 vdupq_n_s32 + +_NEON2SSE_GLOBAL poly8x16_t vmovq_n_p8(poly8_t value); // VDUP.8 q0,r0 +#define vmovq_n_p8 vdupq_n_u8 + +_NEON2SSE_GLOBAL poly16x8_t vmovq_n_p16(poly16_t value); // VDUP.16 q0,r0 +#define vmovq_n_p16 vdupq_n_s16 + +_NEON2SSE_GLOBAL float32x4_t vmovq_n_f32(float32_t value); // VDUP.32 q0,r0 +#define vmovq_n_f32 vdupq_n_f32 + +_NEON2SSE_GLOBAL int64x1_t vmov_n_s64(int64_t value); // VMOV d0,r0,r0 +#define vmov_n_s64 vdup_n_s64 + +_NEON2SSE_GLOBAL uint64x1_t vmov_n_u64(uint64_t value); // VMOV d0,r0,r0 +#define vmov_n_u64 vdup_n_u64 + +_NEON2SSE_GLOBAL int64x2_t vmovq_n_s64(int64_t value); // VMOV d0,r0,r0 +#define vmovq_n_s64 vdupq_n_s64 + +_NEON2SSE_GLOBAL uint64x2_t vmovq_n_u64(uint64_t value); // VMOV d0,r0,r0 +#define vmovq_n_u64 vdupq_n_u64 + +//**************Set all lanes to the value of one lane of a vector ************* +//**************************************************************************** +//here shuffle is better solution than lane extraction followed by set1 function +_NEON2SSESTORAGE uint8x8_t vdup_lane_u8(uint8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[0] +_NEON2SSE_INLINE uint8x8_t vdup_lane_u8(uint8x8_t vec, __constrange(0,7) int lane) +{ + uint8x8_t res; + uint8_t valane; + int i = 0; + valane = vec.m64_u8[lane]; + for (i = 0; i<8; i++) { + res.m64_u8[i] = valane; + } + return res; +} + +_NEON2SSESTORAGE uint16x4_t vdup_lane_u16(uint16x4_t vec, __constrange(0,3) int lane); // VDUP.16 d0,d0[0] +_NEON2SSE_INLINE uint16x4_t vdup_lane_u16(uint16x4_t vec, __constrange(0,3) int lane) +{ + uint16x4_t res; + uint16_t valane; + valane = vec.m64_u16[lane]; + res.m64_u16[0] = valane; + res.m64_u16[1] = valane; + res.m64_u16[2] = valane; + res.m64_u16[3] = valane; + return res; +} + +_NEON2SSESTORAGE uint32x2_t vdup_lane_u32(uint32x2_t vec, __constrange(0,1) int lane); // VDUP.32 d0,d0[0] +_NEON2SSE_INLINE uint32x2_t vdup_lane_u32(uint32x2_t vec, __constrange(0,1) int lane) +{ + uint32x2_t res; + res.m64_u32[0] = vec.m64_u32[lane]; + res.m64_u32[1] = res.m64_u32[0]; + return res; +} + +_NEON2SSE_GLOBAL int8x8_t vdup_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[0] +#define vdup_lane_s8 vdup_lane_u8 + +_NEON2SSE_GLOBAL int16x4_t vdup_lane_s16(int16x4_t vec, __constrange(0,3) int lane); // VDUP.16 d0,d0[0] +#define vdup_lane_s16 vdup_lane_u16 + +_NEON2SSE_GLOBAL int32x2_t vdup_lane_s32(int32x2_t vec, __constrange(0,1) int lane); // VDUP.32 d0,d0[0] +#define vdup_lane_s32 vdup_lane_u32 + +_NEON2SSE_GLOBAL poly8x8_t vdup_lane_p8(poly8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[0] +#define vdup_lane_p8 vdup_lane_u8 + +_NEON2SSE_GLOBAL poly16x4_t vdup_lane_p16(poly16x4_t vec, __constrange(0,3) int lane); // VDUP.16 d0,d0[0] +#define vdup_lane_p16 vdup_lane_s16 + +_NEON2SSESTORAGE float32x2_t vdup_lane_f32(float32x2_t vec, __constrange(0,1) int lane); // VDUP.32 d0,d0[0] +_NEON2SSE_INLINE float32x2_t vdup_lane_f32(float32x2_t vec, __constrange(0,1) int lane) +{ + float32x2_t res; + res.m64_f32[0] = vec.m64_f32[lane]; + res.m64_f32[1] = res.m64_f32[0]; + return res; +} + +_NEON2SSESTORAGE uint8x16_t vdupq_lane_u8(uint8x8_t vec, __constrange(0,7) int lane); // VDUP.8 q0,d0[0] +_NEON2SSE_INLINE uint8x16_t vdupq_lane_u8(uint8x8_t vec, __constrange(0,7) int lane) // VDUP.8 q0,d0[0] +{ + const int8_t lane8 = (int8_t) lane; + _NEON2SSE_ALIGN_16 int8_t lanemask8[16]; + + lanemask8[0] = lane8; + lanemask8[1] = lane8; + lanemask8[2] = lane8; + lanemask8[3] = lane8; + lanemask8[4] = lane8; + lanemask8[5] = lane8; + lanemask8[6] = lane8; + lanemask8[7] = lane8; + lanemask8[8] = lane8; + lanemask8[9] = lane8; + lanemask8[10] = lane8; + lanemask8[11] = lane8; + lanemask8[12] = lane8; + lanemask8[13] = lane8; + lanemask8[14] = lane8; + lanemask8[15] = lane8; + + return _mm_shuffle_epi8 (_pM128i(vec), *(__m128i*) lanemask8); +} + +_NEON2SSESTORAGE uint16x8_t vdupq_lane_u16(uint16x4_t vec, __constrange(0,3) int lane); // VDUP.16 q0,d0[0] +_NEON2SSE_INLINE uint16x8_t vdupq_lane_u16(uint16x4_t vec, __constrange(0,3) int lane) // VDUP.16 q0,d0[0] +{ + //we could use 8bit shuffle for 16 bit as well + const int8_t lane16 = ((int8_t) lane) << 1; + const int8_t lane16_1 = lane16 + 1; + _NEON2SSE_ALIGN_16 int8_t lanemask_e16[16]; + + lanemask_e16[0] = lane16; + lanemask_e16[1] = lane16_1; + lanemask_e16[2] = lane16; + lanemask_e16[3] = lane16_1; + lanemask_e16[4] = lane16; + lanemask_e16[5] = lane16_1; + lanemask_e16[6] = lane16; + lanemask_e16[7] = lane16_1; + lanemask_e16[8] = lane16; + lanemask_e16[9] = lane16_1; + lanemask_e16[10] = lane16; + lanemask_e16[11] = lane16_1; + lanemask_e16[12] = lane16; + lanemask_e16[13] = lane16_1; + lanemask_e16[14] = lane16; + lanemask_e16[15] = lane16_1; + + return _mm_shuffle_epi8 (_pM128i(vec), *(__m128i*)lanemask_e16); +} + +_NEON2SSESTORAGE uint32x4_t vdupq_lane_u32(uint32x2_t vec, __constrange(0,1) int lane); // VDUP.32 q0,d0[0] +_NEON2SSE_INLINE uint32x4_t vdupq_lane_u32(uint32x2_t vec, __constrange(0,1) int lane) +{ + //need to use function not macro to make it gcc friendly and meet the immediate const requirement for _mm_shuffle_epi32 + if (lane == 1) + return _mm_shuffle_epi32 (_pM128i(vec), (1 | (1 << 2) | (1 << 4) | (1 << 6)) ); + else + return _mm_shuffle_epi32 (_pM128i(vec), 0); +} + +_NEON2SSE_GLOBAL int8x16_t vdupq_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VDUP.8 q0,d0[0] +#define vdupq_lane_s8 vdupq_lane_u8 + +_NEON2SSE_GLOBAL int16x8_t vdupq_lane_s16(int16x4_t vec, __constrange(0,3) int lane); // VDUP.16 q0,d0[0] +#define vdupq_lane_s16 vdupq_lane_u16 + +_NEON2SSE_GLOBAL int32x4_t vdupq_lane_s32(int32x2_t vec, __constrange(0,1) int lane); // VDUP.32 q0,d0[0] +#define vdupq_lane_s32 vdupq_lane_u32 + +_NEON2SSE_GLOBAL poly8x16_t vdupq_lane_p8(poly8x8_t vec, __constrange(0,7) int lane); // VDUP.8 q0,d0[0] +#define vdupq_lane_p8 vdupq_lane_u8 + +_NEON2SSE_GLOBAL poly16x8_t vdupq_lane_p16(poly16x4_t vec, __constrange(0,3) int lane); // VDUP.16 q0,d0[0] +#define vdupq_lane_p16 vdupq_lane_s16 + +_NEON2SSE_GLOBAL float32x4_t vdupq_lane_f32(float32x2_t vec, __constrange(0,1) int lane); // VDUP.32 q0,d0[0] +#define vdupq_lane_f32(vec, lane) _mm_load1_ps((vec.m64_f32 + lane)) + +_NEON2SSE_GLOBAL int64x1_t vdup_lane_s64(int64x1_t vec, __constrange(0,0) int lane); // VMOV d0,d0 +#define vdup_lane_s64(vec,lane) vec + +_NEON2SSE_GLOBAL uint64x1_t vdup_lane_u64(uint64x1_t vec, __constrange(0,0) int lane); // VMOV d0,d0 +#define vdup_lane_u64(vec,lane) vec + +_NEON2SSESTORAGE int64x2_t vdupq_lane_s64(int64x1_t vec, __constrange(0,0) int lane); // VMOV q0,q0 +_NEON2SSE_INLINE int64x2_t vdupq_lane_s64(int64x1_t vec, __constrange(0,0) int lane) +{ + __m128i vec128; + UNREFERENCED_PARAM(lane); + vec128 = _pM128i(vec); + return _mm_unpacklo_epi64(vec128,vec128); +} + +_NEON2SSE_GLOBAL uint64x2_t vdupq_lane_u64(uint64x1_t vec, __constrange(0,0) int lane); // VMOV q0,q0 +#define vdupq_lane_u64 vdupq_lane_s64 + +// ******************************************************************** +// ******************** Combining vectors ***************************** +// ******************************************************************** +//These intrinsics join two 64 bit vectors into a single 128bit vector. +_NEON2SSESTORAGE int8x16_t vcombine_s8(int8x8_t low, int8x8_t high); // VMOV d0,d0 +_NEON2SSE_INLINE int8x16_t vcombine_s8(int8x8_t low, int8x8_t high) +{ + return _mm_unpacklo_epi64 (_pM128i(low), _pM128i(high) ); +} + +_NEON2SSE_GLOBAL int16x8_t vcombine_s16(int16x4_t low, int16x4_t high); // VMOV d0,d0 +#define vcombine_s16 vcombine_s8 + +_NEON2SSE_GLOBAL int32x4_t vcombine_s32(int32x2_t low, int32x2_t high); // VMOV d0,d0 +#define vcombine_s32 vcombine_s8 + +_NEON2SSE_GLOBAL int64x2_t vcombine_s64(int64x1_t low, int64x1_t high); // VMOV d0,d0 +#define vcombine_s64 vcombine_s8 + +_NEON2SSE_GLOBAL float16x8_t vcombine_f16(float16x4_t low, float16x4_t high); // VMOV d0,d0 +//current IA SIMD doesn't support float16 + +_NEON2SSESTORAGE float32x4_t vcombine_f32(float32x2_t low, float32x2_t high); // VMOV d0,d0 +_NEON2SSE_INLINE float32x4_t vcombine_f32(float32x2_t low, float32x2_t high) +{ + __m128i res; + res = _mm_unpacklo_epi64(_pM128i(low), _pM128i(high) ); + return _M128(res); +} + +_NEON2SSE_GLOBAL uint8x16_t vcombine_u8(uint8x8_t low, uint8x8_t high); // VMOV d0,d0 +#define vcombine_u8 vcombine_s8 + +_NEON2SSE_GLOBAL uint16x8_t vcombine_u16(uint16x4_t low, uint16x4_t high); // VMOV d0,d0 +#define vcombine_u16 vcombine_s16 + +_NEON2SSE_GLOBAL uint32x4_t vcombine_u32(uint32x2_t low, uint32x2_t high); // VMOV d0,d0 +#define vcombine_u32 vcombine_s32 + +_NEON2SSE_GLOBAL uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high); // VMOV d0,d0 +#define vcombine_u64 vcombine_s64 + +_NEON2SSE_GLOBAL poly8x16_t vcombine_p8(poly8x8_t low, poly8x8_t high); // VMOV d0,d0 +#define vcombine_p8 vcombine_u8 + +_NEON2SSE_GLOBAL poly16x8_t vcombine_p16(poly16x4_t low, poly16x4_t high); // VMOV d0,d0 +#define vcombine_p16 vcombine_u16 + +//********************************************************************** +//************************* Splitting vectors ************************** +//********************************************************************** +//**************** Get high part ****************************************** +//These intrinsics split a 128 bit vector into 2 component 64 bit vectors +_NEON2SSESTORAGE int8x8_t vget_high_s8(int8x16_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int8x8_t vget_high_s8(int8x16_t a) +{ + int8x8_t res64; + __m128i res; + res = _mm_unpackhi_epi64(a,a); //SSE2 + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vget_high_s16(int16x8_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int16x4_t vget_high_s16(int16x8_t a) +{ + int16x4_t res64; + __m128i res; + res = _mm_unpackhi_epi64(a,a); //SSE2 + return64(res); +} + +_NEON2SSESTORAGE int32x2_t vget_high_s32(int32x4_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int32x2_t vget_high_s32(int32x4_t a) +{ + int32x2_t res64; + __m128i res; + res = _mm_unpackhi_epi64(a,a); //SSE2 + return64(res); +} + +_NEON2SSESTORAGE int64x1_t vget_high_s64(int64x2_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int64x1_t vget_high_s64(int64x2_t a) +{ + int64x1_t res64; + __m128i res; + res = _mm_unpackhi_epi64(a,a); //SSE2 + return64(res); +} + +_NEON2SSE_GLOBAL float16x4_t vget_high_f16(float16x8_t a); // VMOV d0,d0 +// IA32 SIMD doesn't work with 16bit floats currently + +_NEON2SSESTORAGE float32x2_t vget_high_f32(float32x4_t a); // VMOV d0,d0 +_NEON2SSE_INLINE float32x2_t vget_high_f32(float32x4_t a) +{ + __m128i res; + __m64_128 res64; + res = _mm_unpackhi_epi64(_M128i(a),_M128i(a)); + return64(res); +} + +_NEON2SSE_GLOBAL uint8x8_t vget_high_u8(uint8x16_t a); // VMOV d0,d0 +#define vget_high_u8 vget_high_s8 + +_NEON2SSE_GLOBAL uint16x4_t vget_high_u16(uint16x8_t a); // VMOV d0,d0 +#define vget_high_u16 vget_high_s16 + +_NEON2SSE_GLOBAL uint32x2_t vget_high_u32(uint32x4_t a); // VMOV d0,d0 +#define vget_high_u32 vget_high_s32 + +_NEON2SSE_GLOBAL uint64x1_t vget_high_u64(uint64x2_t a); // VMOV d0,d0 +#define vget_high_u64 vget_high_s64 + +_NEON2SSE_GLOBAL poly8x8_t vget_high_p8(poly8x16_t a); // VMOV d0,d0 +#define vget_high_p8 vget_high_u8 + +_NEON2SSE_GLOBAL poly16x4_t vget_high_p16(poly16x8_t a); // VMOV d0,d0 +#define vget_high_p16 vget_high_u16 + +//********************** Get low part ********************** +//********************************************************** +_NEON2SSESTORAGE int8x8_t vget_low_s8(int8x16_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int8x8_t vget_low_s8(int8x16_t a) // VMOV d0,d0 +{ + int16x4_t res64; + return64(a); +} + +_NEON2SSESTORAGE int16x4_t vget_low_s16(int16x8_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int16x4_t vget_low_s16(int16x8_t a) // VMOV d0,d0 +{ + int16x4_t res64; + return64(a); +} + +_NEON2SSESTORAGE int32x2_t vget_low_s32(int32x4_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int32x2_t vget_low_s32(int32x4_t a) // VMOV d0,d0 +{ + int32x2_t res64; + return64(a); +} + +_NEON2SSESTORAGE int64x1_t vget_low_s64(int64x2_t a); // VMOV d0,d0 +_NEON2SSE_INLINE int64x1_t vget_low_s64(int64x2_t a) // VMOV d0,d0 +{ + int64x1_t res64; + return64 (a); +} + +_NEON2SSE_GLOBAL float16x4_t vget_low_f16(float16x8_t a); // VMOV d0,d0 +// IA32 SIMD doesn't work with 16bit floats currently + +_NEON2SSESTORAGE float32x2_t vget_low_f32(float32x4_t a); // VMOV d0,d0 +_NEON2SSE_INLINE float32x2_t vget_low_f32(float32x4_t a) +{ + float32x2_t res64; + _M64f(res64, a); + return res64; +} + +_NEON2SSE_GLOBAL uint8x8_t vget_low_u8(uint8x16_t a); // VMOV d0,d0 +#define vget_low_u8 vget_low_s8 + +_NEON2SSE_GLOBAL uint16x4_t vget_low_u16(uint16x8_t a); // VMOV d0,d0 +#define vget_low_u16 vget_low_s16 + +_NEON2SSE_GLOBAL uint32x2_t vget_low_u32(uint32x4_t a); // VMOV d0,d0 +#define vget_low_u32 vget_low_s32 + +_NEON2SSE_GLOBAL uint64x1_t vget_low_u64(uint64x2_t a); // VMOV d0,d0 +#define vget_low_u64 vget_low_s64 + +_NEON2SSE_GLOBAL poly8x8_t vget_low_p8(poly8x16_t a); // VMOV d0,d0 +#define vget_low_p8 vget_low_u8 + +_NEON2SSE_GLOBAL poly16x4_t vget_low_p16(poly16x8_t a); // VMOV d0,d0 +#define vget_low_p16 vget_low_s16 + +//************************************************************************** +//************************ Converting vectors ********************************** +//************************************************************************** +//************* Convert from float *************************************** +// need to set _MM_SET_ROUNDING_MODE ( x) accordingly +_NEON2SSESTORAGE int32x2_t vcvt_s32_f32(float32x2_t a); // VCVT.S32.F32 d0, d0 +_NEON2SSE_INLINE int32x2_t vcvt_s32_f32(float32x2_t a) +{ + int32x2_t res64; + __m128i res; + res = _mm_cvtps_epi32(_pM128(a)); //use low 64 bits of result only + return64(res); +} + +_NEON2SSESTORAGE uint32x2_t vcvt_u32_f32(float32x2_t a); // VCVT.U32.F32 d0, d0 +_NEON2SSE_INLINE uint32x2_t vcvt_u32_f32(float32x2_t a) +{ + uint32x2_t res64; + __m128i res; + res = vcvtq_u32_f32(_pM128(a)); + return64(res); +} + +_NEON2SSESTORAGE int32x4_t vcvtq_s32_f32(float32x4_t a); // VCVT.S32.F32 q0, q0 +_NEON2SSE_INLINE int32x4_t vcvtq_s32_f32(float32x4_t a) +{ + __m128 dif; + __m128i res; + //_mm_cvttps_epi32 incorrectly treats the case a > =2.14748364e+009, therefore the special processing is necessary + _NEON2SSE_ALIGN_16 static const float32_t fmax[] = { 2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f }; + dif = _mm_cmpge_ps(a, *(__m128*)fmax); + res = _mm_cvttps_epi32(a); + return _mm_xor_si128(res, _M128i(dif)); +} + +_NEON2SSESTORAGE uint32x4_t vcvtq_u32_f32(float32x4_t a); // VCVT.U32.F32 q0, q0 +_NEON2SSE_INLINE uint32x4_t vcvtq_u32_f32(float32x4_t a) // VCVT.U32.F32 q0, q0 +{ + //No single instruction SSE solution but we could implement it as following: + __m128i res1, res2, zero, mask; + __m128 max, min, dif; + _NEON2SSE_ALIGN_16 static const float32_t fmax[] = { 2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f }; + _NEON2SSE_ALIGN_16 static const float32_t fmax_unsigned[] = { 4.29496729e+009f, 4.29496729e+009f, 4.29496729e+009f, 4.29496729e+009f }; + zero = _mm_setzero_si128(); + mask = _mm_cmpgt_epi32(_M128i(a), zero); + min = _mm_and_ps(_M128(mask), a); + max = _mm_min_ps(min, *(__m128*)fmax_unsigned); //clamped in 0 - 4.29496729+009 + + dif = _mm_sub_ps(max, *(__m128*)fmax); + mask = _mm_cmpgt_epi32(_M128i(dif),zero); + dif = _mm_and_ps(_M128(mask), dif); + + res1 = _mm_cvttps_epi32(dif); + res2 = vcvtq_s32_f32(max); + return _mm_add_epi32(res1, res2); +} + +// ***** Convert to the fixed point with the number of fraction bits specified by b *********** +//************************************************************************************************* +_NEON2SSESTORAGE uint32_t clamp_u32_f32(float v); +_NEON2SSE_INLINE uint32_t clamp_u32_f32(float v) +{ + return (v <= 0 ? 0 : (v >= (float)~0U ? ~0U : (uint32_t)(v))); +} + +_NEON2SSESTORAGE int32x2_t vcvt_n_s32_f32(float32x2_t a, __constrange(1,32) int b); // VCVT.S32.F32 d0, d0, #32 +_NEON2SSE_INLINE int32x2_t vcvt_n_s32_f32(float32x2_t a, __constrange(1,32) int b) +{ + int32x2_t res64; + return64(vcvtq_n_s32_f32(_pM128(a),b)); +} + +_NEON2SSESTORAGE uint32x2_t vcvt_n_u32_f32(float32x2_t a, __constrange(1,32) int b); // VCVT.U32.F32 d0, d0, #32 +_NEON2SSE_INLINE uint32x2_t vcvt_n_u32_f32(float32x2_t a, __constrange(1,32) int b) +{ + uint32x2_t res; + float convconst; + convconst = (float)((uint64_t)1 << b); + res.m64_u32[0] = clamp_u32_f32(a.m64_f32[0] * convconst); + res.m64_u32[1] = clamp_u32_f32(a.m64_f32[1] * convconst); + return res; +} + +_NEON2SSESTORAGE int32x4_t vcvtq_n_s32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.S32.F32 q0, q0, #32 +_NEON2SSE_INLINE int32x4_t vcvtq_n_s32_f32(float32x4_t a, __constrange(1,32) int b) +{ + float convconst; + _NEON2SSE_ALIGN_16 static const uint32_t cmask[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + __m128 cconst128; + __m128i mask, res; + convconst = (float)((uint64_t)1 << b); + cconst128 = vdupq_n_f32(convconst); + res = _mm_cvttps_epi32(_mm_mul_ps(a,cconst128)); + mask = _mm_cmpeq_epi32 (res, *(__m128i*)cmask); + + /* ...for negative values we do not want to negate the bits of saturated value */ + mask = _mm_and_si128(_mm_castps_si128(_mm_cmpgt_ps(a,_mm_setzero_ps())), mask); + + return _mm_xor_si128 (res, mask); //res saturated for 0x80000000 +} + +_NEON2SSESTORAGE uint32x4_t vcvtq_n_u32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.U32.F32 q0, q0, #32 +_NEON2SSE_INLINE uint32x4_t vcvtq_n_u32_f32(float32x4_t a, __constrange(1,32) int b) +{ + float convconst; + __m128 cconst128; + convconst = (float)((uint64_t)1 << b); + cconst128 = vdupq_n_f32(convconst); + return vcvtq_u32_f32(_mm_mul_ps(a,cconst128)); +} + + +_NEON2SSESTORAGE int32x4_t vcvtnq_s32_f32(float32x4_t a); // VCVTN.S32.F32 q0, q0 +_NEON2SSE_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t a) +{ + return _mm_cvtps_epi32(a); +} + +//***************** Convert to float ************************* +//************************************************************* +_NEON2SSESTORAGE float32x2_t vcvt_f32_s32(int32x2_t a); // VCVT.F32.S32 d0, d0 +_NEON2SSE_INLINE float32x2_t vcvt_f32_s32(int32x2_t a) //use low 64 bits +{ + float32x2_t res; + res.m64_f32[0] = (float) a.m64_i32[0]; + res.m64_f32[1] = (float) a.m64_i32[1]; + return res; +} + +_NEON2SSESTORAGE float32x2_t vcvt_f32_u32(uint32x2_t a); // VCVT.F32.U32 d0, d0 +_NEON2SSE_INLINE float32x2_t vcvt_f32_u32(uint32x2_t a) +{ + float32x2_t res; + res.m64_f32[0] = (float) a.m64_u32[0]; + res.m64_f32[1] = (float) a.m64_u32[1]; + return res; +} + +_NEON2SSE_GLOBAL float32x4_t vcvtq_f32_s32(int32x4_t a); // VCVT.F32.S32 q0, q0 +#define vcvtq_f32_s32(a) _mm_cvtepi32_ps(a) + +_NEON2SSESTORAGE float32x4_t vcvtq_f32_u32(uint32x4_t a); // VCVT.F32.U32 q0, q0 +_NEON2SSE_INLINE float32x4_t vcvtq_f32_u32(uint32x4_t a) // VCVT.F32.U32 q0, q0 +{ + //solution may be not optimal + __m128 two16, fHi, fLo; + __m128i hi, lo; + two16 = _mm_set1_ps((float)0x10000); //2^16 + // Avoid double rounding by doing two exact conversions + // of high and low 16-bit segments + hi = _mm_srli_epi32(a, 16); + lo = _mm_srli_epi32(_mm_slli_epi32(a, 16), 16); + fHi = _mm_mul_ps(_mm_cvtepi32_ps(hi), two16); + fLo = _mm_cvtepi32_ps(lo); + // do single rounding according to current rounding mode + return _mm_add_ps(fHi, fLo); +} + +// ***** Convert to the float from fixed point with the number of fraction bits specified by b *********** +_NEON2SSESTORAGE float32x2_t vcvt_n_f32_s32(int32x2_t a, __constrange(1,32) int b); // VCVT.F32.S32 d0, d0, #32 +_NEON2SSE_INLINE float32x2_t vcvt_n_f32_s32(int32x2_t a, __constrange(1,32) int b) +{ + float32x2_t res; + float convconst; + convconst = (float)(1. / ((uint64_t)1 << b)); + res.m64_f32[0] = a.m64_i32[0] * convconst; + res.m64_f32[1] = a.m64_i32[1] * convconst; + return res; +} + +_NEON2SSESTORAGE float32x2_t vcvt_n_f32_u32(uint32x2_t a, __constrange(1,32) int b); // VCVT.F32.U32 d0, d0, #32 +_NEON2SSE_INLINE float32x2_t vcvt_n_f32_u32(uint32x2_t a, __constrange(1,32) int b) // VCVT.F32.U32 d0, d0, #32 +{ + float32x2_t res; + float convconst; + convconst = (float)(1. / ((uint64_t)1 << b)); + res.m64_f32[0] = a.m64_u32[0] * convconst; + res.m64_f32[1] = a.m64_u32[1] * convconst; + return res; +} + +_NEON2SSESTORAGE float32x4_t vcvtq_n_f32_s32(int32x4_t a, __constrange(1,32) int b); // VCVT.F32.S32 q0, q0, #32 +_NEON2SSE_INLINE float32x4_t vcvtq_n_f32_s32(int32x4_t a, __constrange(1,32) int b) +{ + float convconst; + __m128 cconst128, af; + convconst = (float)(1. / ((uint64_t)1 << b)); + af = _mm_cvtepi32_ps(a); + cconst128 = vdupq_n_f32(convconst); + return _mm_mul_ps(af,cconst128); +} + +_NEON2SSESTORAGE float32x4_t vcvtq_n_f32_u32(uint32x4_t a, __constrange(1,32) int b); // VCVT.F32.U32 q0, q0, #32 +_NEON2SSE_INLINE float32x4_t vcvtq_n_f32_u32(uint32x4_t a, __constrange(1,32) int b) +{ + float convconst; + __m128 cconst128, af; + convconst = (float)(1. / ((uint64_t)1 << b)); + af = vcvtq_f32_u32(a); + cconst128 = vdupq_n_f32(convconst); + return _mm_mul_ps(af,cconst128); +} + +//**************Convert between floats *********************** +//************************************************************ +_NEON2SSE_GLOBAL float16x4_t vcvt_f16_f32(float32x4_t a); // VCVT.F16.F32 d0, q0 +//Intel SIMD doesn't support 16bits floats currently + +_NEON2SSE_GLOBAL float32x4_t vcvt_f32_f16(float16x4_t a); // VCVT.F32.F16 q0, d0 +//Intel SIMD doesn't support 16bits floats currently, the only solution is to store 16bit floats and load as 32 bits + +//************Vector narrow integer conversion (truncation) ****************** +//**************************************************************************** +_NEON2SSESTORAGE int8x8_t vmovn_s16(int16x8_t a); // VMOVN.I16 d0,q0 +_NEON2SSE_INLINE int8x8_t vmovn_s16(int16x8_t a) // VMOVN.I16 d0,q0 +{ + int8x8_t res64; + __m128i res; + res = _mm_shuffle_epi8 (a, *(__m128i*) mask8_16_even_odd); //use 64 low bits only + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vmovn_s32(int32x4_t a); // VMOVN.I32 d0,q0 +_NEON2SSE_INLINE int16x4_t vmovn_s32(int32x4_t a) // VMOVN.I32 d0,q0 +{ + int16x4_t res64; + __m128i res; + res = _mm_shuffle_epi8 (a, *(__m128i*) mask8_32_even_odd); //use 64 low bits only + return64(res); +} + +_NEON2SSESTORAGE int32x2_t vmovn_s64(int64x2_t a); // VMOVN.I64 d0,q0 +_NEON2SSE_INLINE int32x2_t vmovn_s64(int64x2_t a) +{ + //may be not effective compared with a serial implementation + int32x2_t res64; + __m128i res; + res = _mm_shuffle_epi32 (a, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //use 64 low bits only, _MM_SHUFFLE(3, 1, 2, 0) + return64(res); +} + +_NEON2SSE_GLOBAL uint8x8_t vmovn_u16(uint16x8_t a); // VMOVN.I16 d0,q0 +#define vmovn_u16 vmovn_s16 + +_NEON2SSE_GLOBAL uint16x4_t vmovn_u32(uint32x4_t a); // VMOVN.I32 d0,q0 +#define vmovn_u32 vmovn_s32 + +_NEON2SSE_GLOBAL uint32x2_t vmovn_u64(uint64x2_t a); // VMOVN.I64 d0,q0 +#define vmovn_u64 vmovn_s64 + +//**************** Vector long move *********************** +//*********************************************************** +_NEON2SSESTORAGE int16x8_t vmovl_s8(int8x8_t a); // VMOVL.S8 q0,d0 +_NEON2SSE_INLINE int16x8_t vmovl_s8(int8x8_t a) +{ + return _MM_CVTEPI8_EPI16(_pM128i(a)); //SSE4.1 +} + +_NEON2SSESTORAGE int32x4_t vmovl_s16(int16x4_t a); // VMOVL.S16 q0,d0 +_NEON2SSE_INLINE int32x4_t vmovl_s16(int16x4_t a) +{ + return _MM_CVTEPI16_EPI32(_pM128i(a)); //SSE4.1 +} + +_NEON2SSESTORAGE int64x2_t vmovl_s32(int32x2_t a); // VMOVL.S32 q0,d0 +_NEON2SSE_INLINE int64x2_t vmovl_s32(int32x2_t a) +{ + return _MM_CVTEPI32_EPI64(_pM128i(a)); //SSE4.1 +} + +_NEON2SSESTORAGE uint16x8_t vmovl_u8(uint8x8_t a); // VMOVL.U8 q0,d0 +_NEON2SSE_INLINE uint16x8_t vmovl_u8(uint8x8_t a) +{ + return _MM_CVTEPU8_EPI16(_pM128i(a)); //SSE4.1 +} + +_NEON2SSESTORAGE uint32x4_t vmovl_u16(uint16x4_t a); // VMOVL.s16 q0,d0 +_NEON2SSE_INLINE uint32x4_t vmovl_u16(uint16x4_t a) +{ + return _MM_CVTEPU16_EPI32(_pM128i(a)); //SSE4.1 +} + +_NEON2SSESTORAGE uint64x2_t vmovl_u32(uint32x2_t a); // VMOVL.U32 q0,d0 +_NEON2SSE_INLINE uint64x2_t vmovl_u32(uint32x2_t a) +{ + return _MM_CVTEPU32_EPI64(_pM128i(a)); //SSE4.1 +} + +//*************Vector saturating narrow integer***************** +//************************************************************** +_NEON2SSESTORAGE int8x8_t vqmovn_s16(int16x8_t a); // VQMOVN.S16 d0,q0 +_NEON2SSE_INLINE int8x8_t vqmovn_s16(int16x8_t a) +{ + int8x8_t res64; + __m128i res; + res = _mm_packs_epi16(a, a); + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vqmovn_s32(int32x4_t a); // VQMOVN.S32 d0,q0 +_NEON2SSE_INLINE int16x4_t vqmovn_s32(int32x4_t a) +{ + int16x4_t res64; + __m128i res; + res = _mm_packs_epi32(a, a); + return64(res); +} + +_NEON2SSESTORAGE int32x2_t vqmovn_s64(int64x2_t a); // VQMOVN.S64 d0,q0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqmovn_s64(int64x2_t a),_NEON2SSE_REASON_SLOW_SERIAL) //no effective SIMD solution +{ + int32x2_t res; + _NEON2SSE_ALIGN_16 int64_t atmp[2]; + _mm_store_si128((__m128i*)atmp, a); + if(atmp[0]>SINT_MAX) atmp[0] = SINT_MAX; + if(atmp[0]SINT_MAX) atmp[1] = SINT_MAX; + if(atmp[1]16 bits numbers + res_hi = _mm_or_si128(a, mask); //saturated res + res_hi = _mm_shuffle_epi8 (res_hi, *(__m128i*) mask8_32_even_odd); //go to 16 bits + return64(res_hi); + #endif +} + +_NEON2SSESTORAGE uint32x2_t vqmovn_u64(uint64x2_t a); // VQMOVN.U64 d0,q0 +_NEON2SSE_INLINE uint32x2_t vqmovn_u64(uint64x2_t a) +{ + //serial solution may be faster + uint32x2_t res64; + __m128i res_hi, mask; + mask = _mm_setzero_si128(); + res_hi = _mm_srli_epi64(a, 32); + res_hi = _mm_cmpeq_epi32(res_hi, mask); + mask = _mm_cmpeq_epi32(mask,mask); //all fff + mask = _mm_andnot_si128(res_hi,mask); //inverst res_hi to get >32 bits numbers + res_hi = _mm_or_si128(a, mask); + res_hi = _mm_shuffle_epi32(res_hi, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(res_hi); +} +//************* Vector saturating narrow integer signed->unsigned ************** +//***************************************************************************** +_NEON2SSESTORAGE uint8x8_t vqmovun_s16(int16x8_t a); // VQMOVUN.S16 d0,q0 +_NEON2SSE_INLINE uint8x8_t vqmovun_s16(int16x8_t a) +{ + uint8x8_t res64; + __m128i res; + res = _mm_packus_epi16(a, a); //use low 64bits only + return64(res); +} + +_NEON2SSESTORAGE uint16x4_t vqmovun_s32(int32x4_t a); // VQMOVUN.S32 d0,q0 +_NEON2SSE_INLINE uint16x4_t vqmovun_s32(int32x4_t a) +{ + uint16x4_t res64; + __m128i res; + res = _MM_PACKUS1_EPI32(a); //use low 64bits only + return64(res); +} + +_NEON2SSESTORAGE uint32x2_t vqmovun_s64(int64x2_t a); // VQMOVUN.S64 d0,q0 +_NEON2SSE_INLINE uint32x2_t vqmovun_s64(int64x2_t a) +{ + uint32x2_t res64; + __m128i res_hi,res_lo, zero, cmp; + zero = _mm_setzero_si128(); + res_hi = _mm_srli_epi64(a, 32); + cmp = _mm_cmpgt_epi32(zero, res_hi); //if cmp<0 the result should be zero + res_lo = _mm_andnot_si128(cmp,a); //if cmp zero - do nothing, otherwise cmp <0 and the result is 0 + cmp = _mm_cmpgt_epi32(res_hi,zero); //if cmp positive + res_lo = _mm_or_si128(res_lo, cmp); //if cmp positive we are out of 32bits need to saturaate to 0xffffffff + res_lo = _mm_shuffle_epi32(res_lo, 0 | (2 << 2) | (1 << 4) | (3 << 6)); //shuffle the data to get 2 32-bits + return64(res_lo); +} + +// ******************************************************** +// **************** Table look up ************************** +// ******************************************************** +//VTBL (Vector Table Lookup) uses byte indexes in a control vector to look up byte values +//in a table and generate a new vector. Indexes out of range return 0. + +//for Intel SIMD we need to set the MSB to 1 for zero return +//if b is unsigned ( > max signed) or negative it has MSB 1 set and doesn't need any special processing +_NEON2SSESTORAGE uint8x8_t vtbl1_u8(uint8x8_t a, uint8x8_t b); // VTBL.8 d0, {d0}, d0 +_NEON2SSE_INLINE uint8x8_t vtbl1_u8(uint8x8_t a, uint8x8_t b) +{ + uint8x8_t res64; + __m128i c7, maskgt, bmask, b128; + c7 = _mm_set1_epi8 (7); + b128 = _pM128i(b); + maskgt = _mm_cmpgt_epi8(b128,c7); + bmask = _mm_or_si128(b128,maskgt); + bmask = _mm_shuffle_epi8(_pM128i(a),bmask); + return64(bmask); +} + +_NEON2SSE_GLOBAL int8x8_t vtbl1_s8(int8x8_t a, int8x8_t b); // VTBL.8 d0, {d0}, d0 +#define vtbl1_s8 vtbl1_u8 + +_NEON2SSE_GLOBAL poly8x8_t vtbl1_p8(poly8x8_t a, uint8x8_t b); // VTBL.8 d0, {d0}, d0 +#define vtbl1_p8 vtbl1_u8 + +_NEON2SSESTORAGE uint8x8_t vtbl2_u8(uint8x8x2_t a, uint8x8_t b); // VTBL.8 d0, {d0, d1}, d0 +_NEON2SSE_INLINE uint8x8_t vtbl2_u8(uint8x8x2_t a, uint8x8_t b) +{ + uint8x8_t res64; + __m128i c15, a01, maskgt15, bmask, b128; + c15 = _mm_set1_epi8 (15); + b128 = _pM128i(b); + maskgt15 = _mm_cmpgt_epi8(b128,c15); + bmask = _mm_or_si128(b128, maskgt15); + a01 = _mm_unpacklo_epi64(_pM128i(a.val[0]), _pM128i(a.val[1])); + a01 = _mm_shuffle_epi8(a01, bmask); + return64(a01); +} + +//int8x8_t vtbl2_s8(int8x8x2_t a, int8x8_t b); // VTBL.8 d0, {d0, d1}, d0 +#define vtbl2_s8 vtbl2_u8 + +//poly8x8_t vtbl2_p8(poly8x8x2_t a, uint8x8_t b); // VTBL.8 d0, {d0, d1}, d0 +#define vtbl2_p8 vtbl2_u8 + +_NEON2SSESTORAGE uint8x8_t vtbl3_u8(uint8x8x3_t a, uint8x8_t b); // VTBL.8 d0, {d0, d1, d2}, d0 +_NEON2SSE_INLINE uint8x8_t vtbl3_u8(uint8x8x3_t a, uint8x8_t b) +{ + //solution may be not optimal + uint8x8_t res64; + __m128i c15, c23, maskgt23, bmask, maskgt15, sh0, sh1, a01, b128; + c15 = _mm_set1_epi8 (15); + c23 = _mm_set1_epi8 (23); + b128 = _pM128i(b); + maskgt23 = _mm_cmpgt_epi8(b128,c23); + bmask = _mm_or_si128(b128, maskgt23); + maskgt15 = _mm_cmpgt_epi8(b128,c15); + a01 = _mm_unpacklo_epi64(_pM128i(a.val[0]),_pM128i(a.val[1])); + sh0 = _mm_shuffle_epi8(a01, bmask); + sh1 = _mm_shuffle_epi8(_pM128i(a.val[2]), bmask); //for bi>15 bi is wrapped (bi-=15) + sh0 = _MM_BLENDV_EPI8(sh0, sh1, maskgt15); //SSE4.1 + return64(sh0); +} + +_NEON2SSE_GLOBAL int8x8_t vtbl3_s8(int8x8x3_t a, int8x8_t b); // VTBL.8 d0, {d0, d1, d2}, d0 +#define vtbl3_s8 vtbl3_u8 + +_NEON2SSE_GLOBAL poly8x8_t vtbl3_p8(poly8x8x3_t a, uint8x8_t b); // VTBL.8 d0, {d0, d1, d2}, d0 +#define vtbl3_p8 vtbl3_u8 + +_NEON2SSESTORAGE uint8x8_t vtbl4_u8(uint8x8x4_t a, uint8x8_t b); // VTBL.8 d0, {d0, d1, d2, d3}, d0 +_NEON2SSE_INLINE uint8x8_t vtbl4_u8(uint8x8x4_t a, uint8x8_t b) +{ + //solution may be not optimal + uint8x8_t res64; + __m128i c15, c31, maskgt31, bmask, maskgt15, sh0, sh1, a01, a23, b128; + c15 = _mm_set1_epi8 (15); + c31 = _mm_set1_epi8 (31); + b128 = _pM128i(b); + maskgt31 = _mm_cmpgt_epi8(b128,c31); + bmask = _mm_or_si128(b128, maskgt31); + maskgt15 = _mm_cmpgt_epi8(b128,c15); + a01 = _mm_unpacklo_epi64(_pM128i(a.val[0]),_pM128i(a.val[1])); + a23 = _mm_unpacklo_epi64(_pM128i(a.val[2]),_pM128i(a.val[3])); + sh0 = _mm_shuffle_epi8(a01, bmask); + sh1 = _mm_shuffle_epi8(a23, bmask); //for bi>15 bi is wrapped (bi-=15) + sh0 = _MM_BLENDV_EPI8 (sh0, sh1, maskgt15); //SSE4.1 + return64(sh0); +} + +_NEON2SSE_GLOBAL int8x8_t vtbl4_s8(int8x8x4_t a, int8x8_t b); // VTBL.8 d0, {d0, d1, d2, d3}, d0 +#define vtbl4_s8 vtbl4_u8 + +_NEON2SSE_GLOBAL poly8x8_t vtbl4_p8(poly8x8x4_t a, uint8x8_t b); // VTBL.8 d0, {d0, d1, d2, d3}, d0 +#define vtbl4_p8 vtbl4_u8 + +//****************** Extended table look up intrinsics *************************** +//********************************************************************************** +//VTBX (Vector Table Extension) works in the same way as VTBL do, +// except that indexes out of range leave the destination element unchanged. + +_NEON2SSESTORAGE uint8x8_t vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VTBX.8 d0, {d0}, d0 +_NEON2SSE_INLINE uint8x8_t vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint8x8_t res64; + __m128i c8, maskgt, sh, c128; + c8 = _mm_set1_epi8(8); + c128 = _pM128i(c); + //need to pre-clamp c values to avoid unsigned comparison + c128 = _mm_min_epu8(c128, c8); + maskgt = _mm_cmpgt_epi8(c8,c128); + sh = _mm_shuffle_epi8(_pM128i(b),c128); + sh = _mm_and_si128(maskgt,sh); + c8 = _mm_andnot_si128(maskgt,_pM128i(a)); + sh = _mm_or_si128(sh,c8); + return64(sh); +} + +_NEON2SSE_GLOBAL int8x8_t vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c); // VTBX.8 d0, {d0}, d0 +#define vtbx1_s8 vtbx1_u8 + +_NEON2SSE_GLOBAL poly8x8_t vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c); // VTBX.8 d0, {d0}, d0 +#define vtbx1_p8 vtbx1_u8 + +_NEON2SSESTORAGE uint8x8_t vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1}, d0 +_NEON2SSE_INLINE uint8x8_t vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) +{ + uint8x8_t res64; + __m128i c16, b01, maskgt15, sh, c128; + c16 = _mm_set1_epi8(16); + c128 = _pM128i(c); + //need to pre-clamp c values to avoid unsigned comparison + c128 = _mm_min_epu8(c128, c16); + maskgt15 = _mm_cmpgt_epi8(c16,c128); + b01 = _mm_unpacklo_epi64(_pM128i(b.val[0]), _pM128i(b.val[1])); + sh = _mm_shuffle_epi8(b01, c128); + sh = _mm_and_si128(maskgt15, sh); + c16 = _mm_andnot_si128(maskgt15, _pM128i(a)); + sh = _mm_or_si128(sh,c16); + return64(sh); +} + +//int8x8_t vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c); // VTBX.8 d0, {d0, d1}, d0 +#define vtbx2_s8 vtbx2_u8 + +//poly8x8_t vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1}, d0 +#define vtbx2_p8 vtbx2_u8 + +_NEON2SSESTORAGE uint8x8_t vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2}, d0 +_NEON2SSE_INLINE uint8x8_t vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) +{ + //solution may be not optimal + uint8x8_t res64; + __m128i c15, c24, maskgt15, maskgt23, sh0, sh1, b01, c128; + c15 = _mm_set1_epi8 (15); + c24 = _mm_set1_epi8 (24); + c128 = _pM128i(c); + //need to pre-clamp c values to avoid unsigned comparison + c128 = _mm_min_epu8(c128, c24); + maskgt23 = _mm_cmpgt_epi8(c24,c128); + maskgt15 = _mm_cmpgt_epi8(c128,c15); + c24 = _mm_andnot_si128(maskgt23, _pM128i(a)); + b01 = _mm_unpacklo_epi64(_pM128i(b.val[0]),_pM128i(b.val[1])); + sh0 = _mm_shuffle_epi8(b01, c128); + sh1 = _mm_shuffle_epi8(_pM128i(b.val[2]), c128); //for bi>15 bi is wrapped (bi-=15) + sh0 = _MM_BLENDV_EPI8(sh0, sh1, maskgt15); + sh0 = _mm_and_si128(maskgt23,sh0); + sh0 = _mm_or_si128(sh0,c24); + return64(sh0); +} + +_NEON2SSE_GLOBAL int8x8_t vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c); // VTBX.8 d0, {d0, d1, d2}, d0 +#define vtbx3_s8 vtbx3_u8 + +_NEON2SSE_GLOBAL poly8x8_t vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2}, d0 +#define vtbx3_p8 vtbx3_u8 + +_NEON2SSESTORAGE uint8x8_t vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2, d3}, d0 +_NEON2SSE_INLINE uint8x8_t vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) +{ + //solution may be not optimal + uint8x8_t res64; + __m128i c15, c32, maskgt15, maskgt31, sh0, sh1, b01, b23, c128; + c15 = _mm_set1_epi8 (15); + c32 = _mm_set1_epi8 (32); + c128 = _pM128i(c); + //need to pre-clamp c values to avoid unsigned comparison + c128 = _mm_min_epu8(c128, c32); + maskgt15 = _mm_cmpgt_epi8(c128,c15); + maskgt31 = _mm_cmpgt_epi8(c32,c128); + c32 = _mm_andnot_si128(maskgt31, _pM128i(a)); + + b01 = _mm_unpacklo_epi64(_pM128i(b.val[0]),_pM128i(b.val[1])); + b23 = _mm_unpacklo_epi64(_pM128i(b.val[2]),_pM128i(b.val[3])); + sh0 = _mm_shuffle_epi8(b01, c128); + sh1 = _mm_shuffle_epi8(b23, c128); //for bi>15 bi is wrapped (bi-=15) + sh0 = _MM_BLENDV_EPI8(sh0, sh1, maskgt15); + sh0 = _mm_and_si128(maskgt31,sh0); + sh0 = _mm_or_si128(sh0,c32); + return64(sh0); +} + +_NEON2SSE_GLOBAL int8x8_t vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c); // VTBX.8 d0, {d0, d1, d2, d3}, d0 +#define vtbx4_s8 vtbx4_u8 + +_NEON2SSE_GLOBAL poly8x8_t vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c); // VTBX.8 d0, {d0, d1, d2, d3}, d0 +#define vtbx4_p8 vtbx4_u8 + +//************************************************************************************************* +// *************************** Operations with a scalar value ********************************* +//************************************************************************************************* + +//******* Vector multiply accumulate by scalar ************************************************* +//********************************************************************************************** +_NEON2SSESTORAGE int16x4_t vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VMLA.I16 d0, d0, d0[0] +_NEON2SSE_INLINE int16x4_t vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l) // VMLA.I16 d0, d0, d0[0] +{ + int16_t c; + int16x4_t scalar; + c = vget_lane_s16(v, l); + scalar = vdup_n_s16(c); + return vmla_s16(a, b, scalar); +} + +_NEON2SSESTORAGE int32x2_t vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VMLA.I32 d0, d0, d0[0] +_NEON2SSE_INLINE int32x2_t vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l) // VMLA.I32 d0, d0, d0[0] +{ + int32_t c; + int32x2_t scalar; + c = vget_lane_s32(v, l); + scalar = vdup_n_s32(c); + return vmla_s32(a, b, scalar); +} + +_NEON2SSE_GLOBAL uint16x4_t vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLA.I16 d0, d0, d0[0] +#define vmla_lane_u16 vmla_lane_s16 + + +_NEON2SSE_GLOBAL uint32x2_t vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLA.I32 d0, d0, d0[0] +#define vmla_lane_u32 vmla_lane_s32 + +_NEON2SSESTORAGE float32x2_t vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l); // VMLA.F32 d0, d0, d0[0] +_NEON2SSE_INLINE float32x2_t vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l) +{ + float32_t vlane; + float32x2_t c; + vlane = vget_lane_f32(v, l); + c = vdup_n_f32(vlane); + return vmla_f32(a,b,c); +} + +_NEON2SSESTORAGE int16x8_t vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l); // VMLA.I16 q0, q0, d0[0] +_NEON2SSE_INLINE int16x8_t vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l) // VMLA.I16 q0, q0, d0[0] +{ + int16_t vlane; + int16x8_t c; + vlane = vget_lane_s16(v, l); + c = vdupq_n_s16(vlane); + return vmlaq_s16(a,b,c); +} + +_NEON2SSESTORAGE int32x4_t vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l); // VMLA.I32 q0, q0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l) // VMLA.I32 q0, q0, d0[0] +{ + int32_t vlane; + int32x4_t c; + vlane = vget_lane_s32(v, l); + c = vdupq_n_s32(vlane); + return vmlaq_s32(a,b,c); +} + +_NEON2SSE_GLOBAL uint16x8_t vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, __constrange(0,3) int l); // VMLA.I16 q0, q0, d0[0] +#define vmlaq_lane_u16 vmlaq_lane_s16 + +_NEON2SSE_GLOBAL uint32x4_t vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, __constrange(0,1) int l); // VMLA.I32 q0, q0, d0[0] +#define vmlaq_lane_u32 vmlaq_lane_s32 + +_NEON2SSESTORAGE float32x4_t vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l); // VMLA.F32 q0, q0, d0[0] +_NEON2SSE_INLINE float32x4_t vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l) // VMLA.F32 q0, q0, d0[0] +{ + float32_t vlane; + float32x4_t c; + vlane = vget_lane_f32(v, l); + c = vdupq_n_f32(vlane); + return vmlaq_f32(a,b,c); +} + +//***************** Vector widening multiply accumulate by scalar ********************** +//*************************************************************************************** +_NEON2SSESTORAGE int32x4_t vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VMLAL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l) // VMLAL.S16 q0, d0, d0[0] +{ + int16_t vlane; + int16x4_t c; + vlane = vget_lane_s16(v, l); + c = vdup_n_s16(vlane); + return vmlal_s16(a, b, c); +} + +_NEON2SSESTORAGE int64x2_t vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VMLAL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE int64x2_t vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l) // VMLAL.S32 q0, d0, d0[0] +{ + int32_t vlane; + int32x2_t c; + vlane = vget_lane_s32(v, l); + c = vdup_n_s32(vlane); + return vmlal_s32(a, b, c); +} + +_NEON2SSESTORAGE uint32x4_t vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLAL.s16 q0, d0, d0[0] +_NEON2SSE_INLINE uint32x4_t vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l) // VMLAL.s16 q0, d0, d0[0] +{ + uint16_t vlane; + uint16x4_t c; + vlane = vget_lane_u16(v, l); + c = vdup_n_u16(vlane); + return vmlal_u16(a, b, c); +} + +_NEON2SSESTORAGE uint64x2_t vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLAL.U32 q0, d0, d0[0] +_NEON2SSE_INLINE uint64x2_t vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l) // VMLAL.U32 q0, d0, d0[0] +{ + uint32_t vlane; + uint32x2_t c; + vlane = vget_lane_u32(v, l); + c = vdup_n_u32(vlane); + return vmlal_u32(a, b, c); +} + +// ******** Vector widening saturating doubling multiply accumulate by scalar ******************************* +// ************************************************************************************************ +_NEON2SSESTORAGE int32x4_t vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VQDMLAL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l) +{ + int16_t vlane; + int16x4_t c; + vlane = vget_lane_s16(v, l); + c = vdup_n_s16(vlane); + return vqdmlal_s16(a, b, c); +} + +_NEON2SSESTORAGE int64x2_t vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VQDMLAL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE int64x2_t vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l) +{ + int32_t vlane; + uint32x2_t c; + vlane = vget_lane_s32(v, l); + c = vdup_n_s32(vlane); + return vqdmlal_s32(a, b, c); +} + +// ****** Vector multiply subtract by scalar ***************** +// ************************************************************* +_NEON2SSESTORAGE int16x4_t vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VMLS.I16 d0, d0, d0[0] +_NEON2SSE_INLINE int16x4_t vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l) // VMLS.I16 d0, d0, d0[0] +{ + int16_t vlane; + int16x4_t c; + vlane = vget_lane_s16(v, l); + c = vdup_n_s16(vlane); + return vmls_s16(a, b, c); +} + +_NEON2SSESTORAGE int32x2_t vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VMLS.I32 d0, d0, d0[0] +_NEON2SSE_INLINE int32x2_t vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l) // VMLS.I32 d0, d0, d0[0] +{ + int32_t vlane; + int32x2_t c; + vlane = vget_lane_s32(v, l); + c = vdup_n_s32(vlane); + return vmls_s32(a, b, c); +} + +_NEON2SSESTORAGE uint16x4_t vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLS.I16 d0, d0, d0[0] +_NEON2SSE_INLINE uint16x4_t vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l) // VMLS.I16 d0, d0, d0[0] +{ + uint16_t vlane; + uint16x4_t c; + vlane = vget_lane_s16(v, l); + c = vdup_n_s16(vlane); + return vmls_s16(a, b, c); +} + +_NEON2SSESTORAGE uint32x2_t vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLS.I32 d0, d0, d0[0] +_NEON2SSE_INLINE uint32x2_t vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l) // VMLS.I32 d0, d0, d0[0] +{ + uint32_t vlane; + uint32x2_t c; + vlane = vget_lane_u32(v, l); + c = vdup_n_u32(vlane); + return vmls_u32(a, b, c); +} + +_NEON2SSESTORAGE float32x2_t vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l); // VMLS.F32 d0, d0, d0[0] +_NEON2SSE_INLINE float32x2_t vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l) +{ + float32_t vlane; + float32x2_t c; + vlane = (float) vget_lane_f32(v, l); + c = vdup_n_f32(vlane); + return vmls_f32(a,b,c); +} + +_NEON2SSESTORAGE int16x8_t vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l); // VMLS.I16 q0, q0, d0[0] +_NEON2SSE_INLINE int16x8_t vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l) // VMLS.I16 q0, q0, d0[0] +{ + int16_t vlane; + int16x8_t c; + vlane = vget_lane_s16(v, l); + c = vdupq_n_s16(vlane); + return vmlsq_s16(a, b,c); +} + +_NEON2SSESTORAGE int32x4_t vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l); // VMLS.I32 q0, q0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l) // VMLS.I32 q0, q0, d0[0] +{ + int32_t vlane; + int32x4_t c; + vlane = vget_lane_s32(v, l); + c = vdupq_n_s32(vlane); + return vmlsq_s32(a,b,c); +} + +_NEON2SSESTORAGE uint16x8_t vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, __constrange(0,3) int l); // VMLA.I16 q0, q0, d0[0] +_NEON2SSE_INLINE uint16x8_t vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, __constrange(0,3) int l) // VMLA.I16 q0, q0, d0[0] +{ + uint16_t vlane; + uint16x8_t c; + vlane = vget_lane_u16(v, l); + c = vdupq_n_u16(vlane); + return vmlsq_u16(a,b,c); +} + +_NEON2SSESTORAGE uint32x4_t vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, __constrange(0,1) int l); // VMLA.I32 q0, q0, d0[0] +_NEON2SSE_INLINE uint32x4_t vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, __constrange(0,1) int l) // VMLA.I32 q0, q0, d0[0] +{ + uint32_t vlane; + uint32x4_t c; + vlane = vget_lane_u32(v, l); + c = vdupq_n_u32(vlane); + return vmlsq_u32(a,b,c); +} + +_NEON2SSESTORAGE float32x4_t vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l); // VMLA.F32 q0, q0, d0[0] +_NEON2SSE_INLINE float32x4_t vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l) // VMLA.F32 q0, q0, d0[0] +{ + float32_t vlane; + float32x4_t c; + vlane = (float) vget_lane_f32(v, l); + c = vdupq_n_f32(vlane); + return vmlsq_f32(a,b,c); +} + +// **** Vector widening multiply subtract by scalar **** +// **************************************************** +_NEON2SSESTORAGE int32x4_t vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VMLAL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l) // VMLAL.S16 q0, d0, d0[0] +{ + int16_t vlane; + int16x4_t c; + vlane = vget_lane_s16(v, l); + c = vdup_n_s16(vlane); + return vmlsl_s16(a, b, c); +} + +_NEON2SSESTORAGE int64x2_t vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VMLAL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE int64x2_t vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l) // VMLAL.S32 q0, d0, d0[0] +{ + int32_t vlane; + int32x2_t c; + vlane = vget_lane_s32(v, l); + c = vdup_n_s32(vlane); + return vmlsl_s32(a, b, c); +} + +_NEON2SSESTORAGE uint32x4_t vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l); // VMLAL.U16 q0, d0, d0[0] +_NEON2SSE_INLINE uint32x4_t vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l) // VMLAL.U16 q0, d0, d0[0] +{ + uint16_t vlane; + uint16x4_t c; + vlane = vget_lane_u16(v, l); + c = vdup_n_u16(vlane); + return vmlsl_u16(a, b, c); +} + +_NEON2SSESTORAGE uint64x2_t vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l); // VMLAL.U32 q0, d0, d0[0] +_NEON2SSE_INLINE uint64x2_t vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l) // VMLAL.U32 q0, d0, d0[0] +{ + uint32_t vlane; + uint32x2_t c; + vlane = vget_lane_u32(v, l); + c = vdup_n_u32(vlane); + return vmlsl_u32(a, b, c); +} + +//********* Vector widening saturating doubling multiply subtract by scalar ************************** +//****************************************************************************************************** +_NEON2SSESTORAGE int32x4_t vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l); // VQDMLSL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l) +{ + int16_t vlane; + int16x4_t c; + vlane = vget_lane_s16(v, l); + c = vdup_n_s16(vlane); + return vqdmlsl_s16(a, b, c); +} + +_NEON2SSESTORAGE int64x2_t vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l); // VQDMLSL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int32_t vlane; + int32x2_t c; + vlane = vget_lane_s32(v, l); + c = vdup_n_s32(vlane); + return vqdmlsl_s32(a, b, c); +} +//********** Vector multiply with scalar ***************************** +_NEON2SSESTORAGE int16x4_t vmul_n_s16(int16x4_t a, int16_t b); // VMUL.I16 d0,d0,d0[0] +_NEON2SSE_INLINE int16x4_t vmul_n_s16(int16x4_t a, int16_t b) // VMUL.I16 d0,d0,d0[0] +{ + int16x4_t b16x4; + b16x4 = vdup_n_s16(b); + return vmul_s16(a, b16x4); +} + +_NEON2SSESTORAGE int32x2_t vmul_n_s32(int32x2_t a, int32_t b); // VMUL.I32 d0,d0,d0[0] +_NEON2SSE_INLINE int32x2_t vmul_n_s32(int32x2_t a, int32_t b) // VMUL.I32 d0,d0,d0[0] +{ + //serial solution looks faster + int32x2_t b32x2; + b32x2 = vdup_n_s32(b); + return vmul_s32(a, b32x2); +} + +_NEON2SSESTORAGE float32x2_t vmul_n_f32(float32x2_t a, float32_t b); // VMUL.F32 d0,d0,d0[0] +_NEON2SSE_INLINE float32x2_t vmul_n_f32(float32x2_t a, float32_t b) // VMUL.F32 d0,d0,d0[0] +{ + float32x2_t b32x2; + b32x2 = vdup_n_f32(b); + return vmul_f32(a, b32x2); +} + +_NEON2SSESTORAGE uint16x4_t vmul_n_u16(uint16x4_t a, uint16_t b); // VMUL.I16 d0,d0,d0[0] +_NEON2SSE_INLINE uint16x4_t vmul_n_u16(uint16x4_t a, uint16_t b) // VMUL.I16 d0,d0,d0[0] +{ + uint16x4_t b16x4; + b16x4 = vdup_n_s16(b); + return vmul_s16(a, b16x4); +} + +_NEON2SSESTORAGE uint32x2_t vmul_n_u32(uint32x2_t a, uint32_t b); // VMUL.I32 d0,d0,d0[0] +_NEON2SSE_INLINE uint32x2_t vmul_n_u32(uint32x2_t a, uint32_t b) // VMUL.I32 d0,d0,d0[0] +{ + //serial solution looks faster + uint32x2_t b32x2; + b32x2 = vdup_n_u32(b); + return vmul_u32(a, b32x2); +} + +_NEON2SSESTORAGE int16x8_t vmulq_n_s16(int16x8_t a, int16_t b); // VMUL.I16 q0,q0,d0[0] +_NEON2SSE_INLINE int16x8_t vmulq_n_s16(int16x8_t a, int16_t b) // VMUL.I16 q0,q0,d0[0] +{ + int16x8_t b16x8; + b16x8 = vdupq_n_s16(b); + return vmulq_s16(a, b16x8); +} + +_NEON2SSESTORAGE int32x4_t vmulq_n_s32(int32x4_t a, int32_t b); // VMUL.I32 q0,q0,d0[0] +_NEON2SSE_INLINE int32x4_t vmulq_n_s32(int32x4_t a, int32_t b) // VMUL.I32 q0,q0,d0[0] +{ + int32x4_t b32x4; + b32x4 = vdupq_n_s32(b); + return vmulq_s32(a, b32x4); +} + +_NEON2SSESTORAGE float32x4_t vmulq_n_f32(float32x4_t a, float32_t b); // VMUL.F32 q0,q0,d0[0] +_NEON2SSE_INLINE float32x4_t vmulq_n_f32(float32x4_t a, float32_t b) // VMUL.F32 q0,q0,d0[0] +{ + float32x4_t b32x4; + b32x4 = vdupq_n_f32(b); + return vmulq_f32(a, b32x4); +} + +_NEON2SSESTORAGE uint16x8_t vmulq_n_u16(uint16x8_t a, uint16_t b); // VMUL.I16 q0,q0,d0[0] +_NEON2SSE_INLINE uint16x8_t vmulq_n_u16(uint16x8_t a, uint16_t b) // VMUL.I16 q0,q0,d0[0] +{ + uint16x8_t b16x8; + b16x8 = vdupq_n_s16(b); + return vmulq_s16(a, b16x8); +} + +_NEON2SSESTORAGE uint32x4_t vmulq_n_u32(uint32x4_t a, uint32_t b); // VMUL.I32 q0,q0,d0[0] +_NEON2SSE_INLINE uint32x4_t vmulq_n_u32(uint32x4_t a, uint32_t b) // VMUL.I32 q0,q0,d0[0] +{ + uint32x4_t b32x4; + b32x4 = vdupq_n_u32(b); + return vmulq_u32(a, b32x4); +} + +//********** Vector multiply lane ***************************** +_NEON2SSESTORAGE int16x4_t vmul_lane_s16 (int16x4_t a, int16x4_t b, __constrange(0,3) int c); +_NEON2SSE_INLINE int16x4_t vmul_lane_s16 (int16x4_t a, int16x4_t b, __constrange(0,3) int c) +{ + int16x4_t b16x4; + int16_t vlane; + vlane = vget_lane_s16(b, c); + b16x4 = vdup_n_s16(vlane); + return vmul_s16(a, b16x4); +} + +_NEON2SSESTORAGE int32x2_t vmul_lane_s32 (int32x2_t a, int32x2_t b, __constrange(0,1) int c); +_NEON2SSE_INLINE int32x2_t vmul_lane_s32 (int32x2_t a, int32x2_t b, __constrange(0,1) int c) +{ + int32x2_t b32x2; + int32_t vlane; + vlane = vget_lane_s32(b, c); + b32x2 = vdup_n_s32(vlane); + return vmul_s32(a, b32x2); +} + +_NEON2SSESTORAGE float32x2_t vmul_lane_f32 (float32x2_t a, float32x2_t b, __constrange(0,1) int c); +_NEON2SSE_INLINE float32x2_t vmul_lane_f32 (float32x2_t a, float32x2_t b, __constrange(0,1) int c) +{ + float32x2_t b32x2; + float32_t vlane; + vlane = vget_lane_f32(b, c); + b32x2 = vdup_n_f32(vlane); + return vmul_f32(a, b32x2); +} + +_NEON2SSE_GLOBAL uint16x4_t vmul_lane_u16 (uint16x4_t a, uint16x4_t b, __constrange(0,3) int c); +#define vmul_lane_u16 vmul_lane_s16 + +_NEON2SSE_GLOBAL uint32x2_t vmul_lane_u32 (uint32x2_t a, uint32x2_t b, __constrange(0,1) int c); +#define vmul_lane_u32 vmul_lane_s32 + +_NEON2SSESTORAGE int16x8_t vmulq_lane_s16(int16x8_t a, int16x4_t b, __constrange(0,3) int c); +_NEON2SSE_INLINE int16x8_t vmulq_lane_s16 (int16x8_t a, int16x4_t b, __constrange(0,3) int c) +{ + int16x8_t b16x8; + int16_t vlane; + vlane = vget_lane_s16(b, c); + b16x8 = vdupq_n_s16(vlane); + return vmulq_s16(a, b16x8); +} + +_NEON2SSESTORAGE int32x4_t vmulq_lane_s32 (int32x4_t a, int32x2_t b, __constrange(0,1) int c); +_NEON2SSE_INLINE int32x4_t vmulq_lane_s32 (int32x4_t a, int32x2_t b, __constrange(0,1) int c) +{ + int32x4_t b32x4; + int32_t vlane; + vlane = vget_lane_s32(b, c); + b32x4 = vdupq_n_s32(vlane); + return vmulq_s32(a, b32x4); +} + +_NEON2SSESTORAGE float32x4_t vmulq_lane_f32 (float32x4_t a, float32x2_t b, __constrange(0,1) int c); +_NEON2SSE_INLINE float32x4_t vmulq_lane_f32 (float32x4_t a, float32x2_t b, __constrange(0,1) int c) +{ + float32x4_t b32x4; + float32_t vlane; + vlane = vget_lane_f32(b, c); + b32x4 = vdupq_n_f32(vlane); + return vmulq_f32(a, b32x4); +} + +_NEON2SSE_GLOBAL uint16x8_t vmulq_lane_u16 (uint16x8_t a, uint16x4_t b, __constrange(0,3) int c); +#define vmulq_lane_u16 vmulq_lane_s16 + +_NEON2SSE_GLOBAL uint32x4_t vmulq_lane_u32 (uint32x4_t a, uint32x2_t b, __constrange(0,1) int c); +#define vmulq_lane_u32 vmulq_lane_s32 + +//**** Vector long multiply with scalar ************ +_NEON2SSESTORAGE int32x4_t vmull_n_s16(int16x4_t vec1, int16_t val2); // VMULL.S16 q0,d0,d0[0] +_NEON2SSE_INLINE int32x4_t vmull_n_s16(int16x4_t vec1, int16_t val2) // VMULL.S16 q0,d0,d0[0] +{ + int16x4_t b16x4; + b16x4 = vdup_n_s16(val2); + return vmull_s16(vec1, b16x4); +} + +_NEON2SSESTORAGE int64x2_t vmull_n_s32(int32x2_t vec1, int32_t val2); // VMULL.S32 q0,d0,d0[0] +_NEON2SSE_INLINE int64x2_t vmull_n_s32(int32x2_t vec1, int32_t val2) // VMULL.S32 q0,d0,d0[0] +{ + int32x2_t b32x2; + b32x2 = vdup_n_s32(val2); + return vmull_s32(vec1, b32x2); +} + +_NEON2SSESTORAGE uint32x4_t vmull_n_u16(uint16x4_t vec1, uint16_t val2); // VMULL.s16 q0,d0,d0[0] +_NEON2SSE_INLINE uint32x4_t vmull_n_u16(uint16x4_t vec1, uint16_t val2) // VMULL.s16 q0,d0,d0[0] +{ + uint16x4_t b16x4; + b16x4 = vdup_n_s16(val2); + return vmull_u16(vec1, b16x4); +} + +_NEON2SSESTORAGE uint64x2_t vmull_n_u32(uint32x2_t vec1, uint32_t val2); // VMULL.U32 q0,d0,d0[0] +_NEON2SSE_INLINE uint64x2_t vmull_n_u32(uint32x2_t vec1, uint32_t val2) // VMULL.U32 q0,d0,d0[0] +{ + uint32x2_t b32x2; + b32x2 = vdup_n_u32(val2); + return vmull_u32(vec1, b32x2); +} + +//**** Vector long multiply by scalar **** +_NEON2SSESTORAGE int32x4_t vmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VMULL.S16 q0,d0,d0[0] +_NEON2SSE_INLINE int32x4_t vmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3) // VMULL.S16 q0,d0,d0[0] +{ + int16_t vlane; + int16x4_t b; + vlane = vget_lane_s16(val2, val3); + b = vdup_n_s16(vlane); + return vmull_s16(vec1, b); +} + +_NEON2SSESTORAGE int64x2_t vmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VMULL.S32 q0,d0,d0[0] +_NEON2SSE_INLINE int64x2_t vmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3) // VMULL.S32 q0,d0,d0[0] +{ + int32_t vlane; + int32x2_t b; + vlane = vget_lane_s32(val2, val3); + b = vdup_n_s32(vlane); + return vmull_s32(vec1, b); +} + +_NEON2SSESTORAGE uint32x4_t vmull_lane_u16(uint16x4_t vec1, uint16x4_t val2, __constrange(0, 3) int val3); // VMULL.s16 q0,d0,d0[0] +_NEON2SSE_INLINE uint32x4_t vmull_lane_u16(uint16x4_t vec1, uint16x4_t val2, __constrange(0, 3) int val3) // VMULL.s16 q0,d0,d0[0] +{ + uint16_t vlane; + uint16x4_t b; + vlane = vget_lane_s16(val2, val3); + b = vdup_n_s16(vlane); + return vmull_u16(vec1, b); +} + +_NEON2SSESTORAGE uint64x2_t vmull_lane_u32(uint32x2_t vec1, uint32x2_t val2, __constrange(0, 1) int val3); // VMULL.U32 q0,d0,d0[0] +_NEON2SSE_INLINE uint64x2_t vmull_lane_u32(uint32x2_t vec1, uint32x2_t val2, __constrange(0, 1) int val3) // VMULL.U32 q0,d0,d0[0] +{ + uint32_t vlane; + uint32x2_t b; + vlane = vget_lane_u32(val2, val3); + b = vdup_n_u32(vlane); + return vmull_u32(vec1, b); +} + +//********* Vector saturating doubling long multiply with scalar ******************* +_NEON2SSESTORAGE int32x4_t vqdmull_n_s16(int16x4_t vec1, int16_t val2); // VQDMULL.S16 q0,d0,d0[0] +_NEON2SSE_INLINE int32x4_t vqdmull_n_s16(int16x4_t vec1, int16_t val2) +{ + //the serial soulution may be faster due to saturation + int16x4_t b; + b = vdup_n_s16(val2); + return vqdmull_s16(vec1, b); +} + +_NEON2SSESTORAGE int64x2_t vqdmull_n_s32(int32x2_t vec1, int32_t val2); // VQDMULL.S32 q0,d0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmull_n_s32(int32x2_t vec1, int32_t val2), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int32x2_t b; + b = vdup_n_s32(val2); + return vqdmull_s32(vec1,b); //slow serial function!!!! +} + +//************* Vector saturating doubling long multiply by scalar *********************************************** +_NEON2SSESTORAGE int32x4_t vqdmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQDMULL.S16 q0,d0,d0[0] +_NEON2SSE_INLINE int32x4_t vqdmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3) +{ + int16_t c; + int16x4_t scalar; + c = vget_lane_s16(val2, val3); + scalar = vdup_n_s16(c); + return vqdmull_s16(vec1, scalar); +} + + +_NEON2SSESTORAGE int64x2_t vqdmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQDMULL.S32 q0,d0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int32_t c; + int32x2_t scalar; + c = vget_lane_s32(val2, val3); + scalar = vdup_n_s32(c); + return vqdmull_s32(vec1,scalar); //slow serial function!!!! +} + +// *****Vector saturating doubling multiply high with scalar ***** +_NEON2SSESTORAGE int16x4_t vqdmulh_n_s16(int16x4_t vec1, int16_t val2); // VQDMULH.S16 d0,d0,d0[0] +_NEON2SSE_INLINE int16x4_t vqdmulh_n_s16(int16x4_t vec1, int16_t val2) +{ + int16x4_t res64; + return64(vqdmulhq_n_s16(_pM128i(vec1), val2)); +} + +_NEON2SSESTORAGE int32x2_t vqdmulh_n_s32(int32x2_t vec1, int32_t val2); // VQDMULH.S32 d0,d0,d0[0] +_NEON2SSE_INLINE int32x2_t vqdmulh_n_s32(int32x2_t vec1, int32_t val2) +{ + int32x2_t res64; + return64(vqdmulhq_n_s32(_pM128i(vec1), val2)); +} + +_NEON2SSESTORAGE int16x8_t vqdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQDMULH.S16 q0,q0,d0[0] +_NEON2SSE_INLINE int16x8_t vqdmulhq_n_s16(int16x8_t vec1, int16_t val2) // VQDMULH.S16 q0,q0,d0[0] +{ + //solution may be not optimal + int16x8_t scalar; + scalar = vdupq_n_s16(val2); + return vqdmulhq_s16(vec1, scalar); +} + +_NEON2SSESTORAGE int32x4_t vqdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQDMULH.S32 q0,q0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqdmulhq_n_s32(int32x4_t vec1, int32_t val2), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + int32x4_t scalar; + scalar = vdupq_n_s32(val2); + return vqdmulhq_s32(vec1, scalar); +} + +//***** Vector saturating doubling multiply high by scalar **************** +_NEON2SSESTORAGE int16x4_t vqdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQDMULH.S16 d0,d0,d0[0] +_NEON2SSE_INLINE int16x4_t vqdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3) // VQDMULH.S16 d0,d0,d0[0] +{ + //solution may be not optimal + int16_t vlane; + int16x4_t scalar; + vlane = vget_lane_s16(val2, val3); + scalar = vdup_n_s16(vlane); + return vqdmulh_s16(vec1, scalar); +} + +_NEON2SSESTORAGE int32x2_t vqdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQDMULH.S32 d0,d0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + int32_t vlane; + int32x2_t scalar; + vlane = vget_lane_s32(val2, val3); + scalar = vdup_n_s32(vlane); + return vqdmulh_s32(vec1, scalar); +} + +_NEON2SSESTORAGE int16x8_t vqdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQDMULH.S16 q0,q0,d0[0] +_NEON2SSE_INLINE int16x8_t vqdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3) // VQDMULH.S16 q0,q0,d0[0] +{ + //solution may be not optimal + int16_t vlane; + int16x8_t scalar; + vlane = vget_lane_s16(val2, val3); + scalar = vdupq_n_s16(vlane ); + return vqdmulhq_s16(vec1, scalar); +} + +_NEON2SSESTORAGE int32x4_t vqdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQDMULH.S32 q0,q0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + //solution may be not optimal + int32_t vlane; + int32x4_t scalar; + vlane = vgetq_lane_s32(_pM128i(val2), val3); + scalar = vdupq_n_s32(vlane ); + return vqdmulhq_s32(vec1, scalar); +} + +//******** Vector saturating rounding doubling multiply high with scalar *** +_NEON2SSESTORAGE int16x4_t vqrdmulh_n_s16(int16x4_t vec1, int16_t val2); // VQRDMULH.S16 d0,d0,d0[0] +_NEON2SSE_INLINE int16x4_t vqrdmulh_n_s16(int16x4_t vec1, int16_t val2) // VQRDMULH.S16 d0,d0,d0[0] +{ + //solution may be not optimal + int16x4_t scalar; + scalar = vdup_n_s16(val2); + return vqrdmulh_s16(vec1, scalar); +} + +_NEON2SSESTORAGE int32x2_t vqrdmulh_n_s32(int32x2_t vec1, int32_t val2); // VQRDMULH.S32 d0,d0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqrdmulh_n_s32(int32x2_t vec1, int32_t val2), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + int32x2_t scalar; + scalar = vdup_n_s32(val2); + return vqrdmulh_s32(vec1, scalar); +} + +_NEON2SSESTORAGE int16x8_t vqrdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQRDMULH.S16 q0,q0,d0[0] +_NEON2SSE_INLINE int16x8_t vqrdmulhq_n_s16(int16x8_t vec1, int16_t val2) // VQRDMULH.S16 q0,q0,d0[0] +{ + //solution may be not optimal + int16x8_t scalar; + scalar = vdupq_n_s16(val2); + return vqrdmulhq_s16(vec1, scalar); +} + +_NEON2SSESTORAGE int32x4_t vqrdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQRDMULH.S32 q0,q0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqrdmulhq_n_s32(int32x4_t vec1, int32_t val2), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + int32x4_t scalar; + scalar = vdupq_n_s32(val2); + return vqrdmulhq_s32(vec1, scalar); +} + +//********* Vector rounding saturating doubling multiply high by scalar **** +_NEON2SSESTORAGE int16x4_t vqrdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQRDMULH.S16 d0,d0,d0[0] +_NEON2SSE_INLINE int16x4_t vqrdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3) // VQRDMULH.S16 d0,d0,d0[0] +{ + //solution may be not optimal + int16_t vlane; + int16x4_t scalar; + vlane = vget_lane_s16(val2, val3); + scalar = vdup_n_s16(vlane); + return vqrdmulh_s16(vec1, scalar); +} + +_NEON2SSESTORAGE int32x2_t vqrdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQRDMULH.S32 d0,d0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x2_t vqrdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + int32_t vlane; + int32x2_t scalar; + vlane = vget_lane_s32(val2, val3); + scalar = vdup_n_s32(vlane); + return vqrdmulh_s32(vec1, scalar); +} + +_NEON2SSESTORAGE int16x8_t vqrdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3); // VQRDMULH.S16 q0,q0,d0[0] +_NEON2SSE_INLINE int16x8_t vqrdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3) // VQRDMULH.S16 q0,q0,d0[0] +{ + //solution may be not optimal + int16_t vlane; + int16x8_t scalar; + vlane = vget_lane_s16(val2, val3); + scalar = vdupq_n_s16(vlane); + return vqrdmulhq_s16(vec1, scalar); +} + +_NEON2SSESTORAGE int32x4_t vqrdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3); // VQRDMULH.S32 q0,q0,d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqrdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3), _NEON2SSE_REASON_SLOW_UNEFFECTIVE) +{ + //solution may be not optimal + int32_t vlane; + int32x4_t scalar; + vlane = vgetq_lane_s32(_pM128i(val2), val3); + scalar = vdupq_n_s32(vlane ); + return vqrdmulhq_s32(vec1, scalar); +} + +//**************Vector multiply accumulate with scalar ******************* +_NEON2SSESTORAGE int16x4_t vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c); // VMLA.I16 d0, d0, d0[0] +_NEON2SSE_INLINE int16x4_t vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) // VMLA.I16 d0, d0, d0[0] +{ + int16x4_t scalar; + scalar = vdup_n_s16(c); + return vmla_s16(a, b, scalar); +} + +_NEON2SSESTORAGE int32x2_t vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c); // VMLA.I32 d0, d0, d0[0] +_NEON2SSE_INLINE int32x2_t vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) // VMLA.I32 d0, d0, d0[0] +{ + int32x2_t scalar; + scalar = vdup_n_s32(c); + return vmla_s32(a, b, scalar); +} + +_NEON2SSE_GLOBAL uint16x4_t vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c); // VMLA.I16 d0, d0, d0[0] +#define vmla_n_u16 vmla_n_s16 + + +_NEON2SSE_GLOBAL uint32x2_t vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c); // VMLA.I32 d0, d0, d0[0] +#define vmla_n_u32 vmla_n_s32 + + +_NEON2SSESTORAGE float32x2_t vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c); // VMLA.F32 d0, d0, d0[0] +_NEON2SSE_INLINE float32x2_t vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) // VMLA.F32 d0, d0, d0[0] +{ + float32x2_t scalar; + scalar = vdup_n_f32(c); + return vmla_f32(a, b, scalar); +} + +_NEON2SSESTORAGE int16x8_t vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLA.I16 q0, q0, d0[0] +_NEON2SSE_INLINE int16x8_t vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) // VMLA.I16 q0, q0, d0[0] +{ + int16x8_t scalar; + scalar = vdupq_n_s16(c); + return vmlaq_s16(a,b,scalar); +} + +_NEON2SSESTORAGE int32x4_t vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLA.I32 q0, q0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) // VMLA.I32 q0, q0, d0[0] +{ + int32x4_t scalar; + scalar = vdupq_n_s32(c); + return vmlaq_s32(a,b,scalar); +} + +_NEON2SSE_GLOBAL uint16x8_t vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLA.I16 q0, q0, d0[0] +#define vmlaq_n_u16 vmlaq_n_s16 + +_NEON2SSE_GLOBAL uint32x4_t vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLA.I32 q0, q0, d0[0] +#define vmlaq_n_u32 vmlaq_n_s32 + +_NEON2SSESTORAGE float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLA.F32 q0, q0, d0[0] +_NEON2SSE_INLINE float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) // VMLA.F32 q0, q0, d0[0] +{ + float32x4_t scalar; + scalar = vdupq_n_f32(c); + return vmlaq_f32(a,b,scalar); +} + +//************Vector widening multiply accumulate with scalar**************************** +_NEON2SSESTORAGE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLAL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) // VMLAL.S16 q0, d0, d0[0] +{ + int16x4_t vc; + vc = vdup_n_s16(c); + return vmlal_s16(a, b, vc); +} + +_NEON2SSESTORAGE int64x2_t vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VMLAL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE int64x2_t vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) // VMLAL.S32 q0, d0, d0[0] +{ + int32x2_t vc; + vc = vdup_n_s32(c); + return vmlal_s32(a, b, vc); +} + +_NEON2SSESTORAGE uint32x4_t vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c); // VMLAL.s16 q0, d0, d0[0] +_NEON2SSE_INLINE uint32x4_t vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) // VMLAL.s16 q0, d0, d0[0] +{ + uint16x4_t vc; + vc = vdup_n_u16(c); + return vmlal_u16(a, b, vc); +} + +_NEON2SSESTORAGE uint64x2_t vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c); // VMLAL.U32 q0, d0, d0[0] +_NEON2SSE_INLINE uint64x2_t vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) // VMLAL.U32 q0, d0, d0[0] +{ + uint32x2_t vc; + vc = vdup_n_u32(c); + return vmlal_u32(a, b, vc); +} + +//************ Vector widening saturating doubling multiply accumulate with scalar ************** +_NEON2SSESTORAGE int32x4_t vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VQDMLAL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) +{ + //not optimal SIMD soulution, serial may be faster + int16x4_t vc; + vc = vdup_n_s16(c); + return vqdmlal_s16(a, b, vc); +} + +_NEON2SSESTORAGE int64x2_t vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VQDMLAL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int32x2_t vc; + vc = vdup_n_s32(c); + return vqdmlal_s32(a, b, vc); +} + +//******** Vector multiply subtract with scalar ************** +_NEON2SSESTORAGE int16x4_t vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c); // VMLS.I16 d0, d0, d0[0] +_NEON2SSE_INLINE int16x4_t vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) // VMLS.I16 d0, d0, d0[0] +{ + int16x4_t vc; + vc = vdup_n_s16(c); + return vmls_s16(a, b, vc); +} + +_NEON2SSESTORAGE int32x2_t vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c); // VMLS.I32 d0, d0, d0[0] +_NEON2SSE_INLINE int32x2_t vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) // VMLS.I32 d0, d0, d0[0] +{ + int32x2_t vc; + vc = vdup_n_s32(c); + return vmls_s32(a, b, vc); +} + +_NEON2SSESTORAGE uint16x4_t vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c); // VMLS.I16 d0, d0, d0[0] +_NEON2SSE_INLINE uint16x4_t vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) // VMLS.I16 d0, d0, d0[0] +{ + uint16x4_t vc; + vc = vdup_n_s16(c); + return vmls_s16(a, b, vc); +} + +_NEON2SSESTORAGE uint32x2_t vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c); // VMLS.I32 d0, d0, d0[0] +_NEON2SSE_INLINE uint32x2_t vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) // VMLS.I32 d0, d0, d0[0] +{ + uint32x2_t vc; + vc = vdup_n_u32(c); + return vmls_u32(a, b, vc); +} + +_NEON2SSESTORAGE float32x2_t vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c); // VMLS.F32 d0, d0, d0[0] +_NEON2SSE_INLINE float32x2_t vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) +{ + float32x2_t res; + res.m64_f32[0] = a.m64_f32[0] - b.m64_f32[0] * c; + res.m64_f32[1] = a.m64_f32[1] - b.m64_f32[1] * c; + return res; +} + +_NEON2SSESTORAGE int16x8_t vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLS.I16 q0, q0, d0[0] +_NEON2SSE_INLINE int16x8_t vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) // VMLS.I16 q0, q0, d0[0] +{ + int16x8_t vc; + vc = vdupq_n_s16(c); + return vmlsq_s16(a, b,vc); +} + +_NEON2SSESTORAGE int32x4_t vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLS.I32 q0, q0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) // VMLS.I32 q0, q0, d0[0] +{ + int32x4_t vc; + vc = vdupq_n_s32(c); + return vmlsq_s32(a,b,vc); +} + +_NEON2SSESTORAGE uint16x8_t vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLS.I16 q0, q0, d0[0] +_NEON2SSE_INLINE uint16x8_t vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) // VMLS.I16 q0, q0, d0[0] +{ + uint16x8_t vc; + vc = vdupq_n_u16(c); + return vmlsq_u16(a,b,vc); +} + +_NEON2SSESTORAGE uint32x4_t vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLS.I32 q0, q0, d0[0] +_NEON2SSE_INLINE uint32x4_t vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) // VMLS.I32 q0, q0, d0[0] +{ + uint32x4_t vc; + vc = vdupq_n_u32(c); + return vmlsq_u32(a,b,vc); +} + +_NEON2SSESTORAGE float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLS.F32 q0, q0, d0[0] +_NEON2SSE_INLINE float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) +{ + float32x4_t vc; + vc = vdupq_n_f32(c); + return vmlsq_f32(a,b,vc); +} + +//**** Vector widening multiply subtract with scalar ****** +_NEON2SSESTORAGE int32x4_t vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLSL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) // VMLSL.S16 q0, d0, d0[0] +{ + int16x4_t vc; + vc = vdup_n_s16(c); + return vmlsl_s16(a, b, vc); +} + +_NEON2SSESTORAGE int64x2_t vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VMLSL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE int64x2_t vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) // VMLSL.S32 q0, d0, d0[0] +{ + int32x2_t vc; + vc = vdup_n_s32(c); + return vmlsl_s32(a, b, vc); +} + +_NEON2SSESTORAGE uint32x4_t vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c); // VMLSL.s16 q0, d0, d0[0] +_NEON2SSE_INLINE uint32x4_t vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) // VMLSL.s16 q0, d0, d0[0] +{ + uint16x4_t vc; + vc = vdup_n_u16(c); + return vmlsl_u16(a, b, vc); +} + +_NEON2SSESTORAGE uint64x2_t vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c); // VMLSL.U32 q0, d0, d0[0] +_NEON2SSE_INLINE uint64x2_t vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) // VMLSL.U32 q0, d0, d0[0] +{ + uint32x2_t vc; + vc = vdup_n_u32(c); + return vmlsl_u32(a, b, vc); +} + +//***** Vector widening saturating doubling multiply subtract with scalar ********* +//********************************************************************************** +_NEON2SSESTORAGE int32x4_t vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VQDMLSL.S16 q0, d0, d0[0] +_NEON2SSE_INLINE int32x4_t vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) +{ + int16x4_t vc; + vc = vdup_n_s16(c); + return vqdmlsl_s16(a, b, vc); +} + +_NEON2SSESTORAGE int64x2_t vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c); // VQDMLSL.S32 q0, d0, d0[0] +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int32x2_t vc; + vc = vdup_n_s32(c); + return vqdmlsl_s32(a, b, vc); +} + +//******************* Vector extract *********************************************** +//************************************************************************************* +//VEXT (Vector Extract) extracts elements from the bottom end of the second operand +//vector and the top end of the first, concatenates them, and places the result in the destination vector +//c elements from the bottom end of the second operand and (8-c) from the top end of the first +_NEON2SSESTORAGE int8x8_t vext_s8(int8x8_t a, int8x8_t b, __constrange(0,7) int c); // VEXT.8 d0,d0,d0,#0 +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x8_t vext_s8(int8x8_t a, int8x8_t b, __constrange(0,7) int c),_NEON2SSE_REASON_SLOW_SERIAL) +{ + int8x8_t res; + int i; + for (i = 0; i<8 - c; i++) { + res.m64_i8[i] = a.m64_i8[i + c]; + } + for(i = 0; i>8)); + //r = 158 - (*(int32_t *)&f >> 23); + __m128i c158 = _mm_set1_epi32(158); + __m128i c32 = _mm_set_epi16(0, 32, 0, 32, 0, 32, 0, 32); + __m128i lsr = _mm_srai_epi32(a, 8); + __m128i atrunc = _mm_andnot_si128(lsr, a); //truncation + __m128 atruncf = _mm_cvtepi32_ps(atrunc); + __m128i res = _mm_castps_si128(atruncf); + res = _mm_srli_epi32(res, 23); + res = _mm_sub_epi32(c158, res); + return _mm_min_epi16(res, c32); +} + +_NEON2SSE_GLOBAL uint8x16_t vclzq_u8(uint8x16_t a); // VCLZ.I8 q0,q0 +#define vclzq_u8 vclzq_s8 + +_NEON2SSE_GLOBAL uint16x8_t vclzq_u16(uint16x8_t a); // VCLZ.I16 q0,q0 +#define vclzq_u16 vclzq_s16 + +_NEON2SSESTORAGE uint32x4_t vclzq_u32(uint32x4_t a); // VCLZ.I32 q0,q0 +_NEON2SSE_INLINE uint32x4_t vclzq_u32(uint32x4_t a) +{ // compute count of leading zero bits using floating-point conversion trick + //same as for signed ints but to emulate unsigned conversion we divide a/2 before the conversion, then double and increment after the conversion + // input integer a, result r, float f = (float)(int)((a >> 1) & ~(a >> 2)); f = 2*f + 1.0 + //r = (*(uint32_t *)&f >> 23) - 0x7f; + __m128i zero = _mm_setzero_si128(); + __m128 fp1 = _mm_set_ps1(1.0f); + __m128i c158 = _mm_set1_epi32(158); + __m128i mask = _mm_cmpeq_epi32(a, zero); + __m128i lsr1 = _mm_srli_epi32(a, 1); + __m128i lsr2 = _mm_srli_epi32(a, 2); + __m128i atrunc = _mm_andnot_si128(lsr2, lsr1); + __m128 atruncf = _mm_cvtepi32_ps(atrunc); + __m128 atruncf2 = _mm_add_ps(atruncf, atruncf); + __m128 conv = _mm_add_ps(atruncf2, fp1); + __m128i res = _mm_castps_si128(conv); + res = _mm_srli_epi32(res, 23); + res = _mm_sub_epi32(c158, res); + return _mm_sub_epi32(res, mask); +} + +//************** Count leading sign bits ************************** +//******************************************************************** +//VCLS (Vector Count Leading Sign bits) counts the number of consecutive bits following +// the topmost bit, that are the same as the topmost bit, in each element in a vector +//No corresponding vector intrinsics in IA32, need to implement it. +//While the implementation is effective for 8 bits, it may be not for 16 and 32 bits +_NEON2SSESTORAGE int8x8_t vcls_s8(int8x8_t a); // VCLS.S8 d0,d0 +_NEON2SSE_INLINE int8x8_t vcls_s8(int8x8_t a) +{ + int8x8_t res64; + __m128i res; + res = vclsq_s8(_pM128i(a)); + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vcls_s16(int16x4_t a); // VCLS.S16 d0,d0 +_NEON2SSE_INLINE int16x4_t vcls_s16(int16x4_t a) +{ + int16x4_t res64; + __m128i res; + res = vclsq_s16(_pM128i(a)); + return64(res); +} + +_NEON2SSESTORAGE int32x2_t vcls_s32(int32x2_t a); // VCLS.S32 d0,d0 +_NEON2SSE_INLINE int32x2_t vcls_s32(int32x2_t a) +{ + int32x2_t res64; + __m128i res; + res = vclsq_s32(_pM128i(a)); + return64(res); +} + +_NEON2SSESTORAGE int8x16_t vclsq_s8(int8x16_t a); // VCLS.S8 q0,q0 +_NEON2SSE_INLINE int8x16_t vclsq_s8(int8x16_t a) +{ + __m128i cff, c80, c1, a_mask, a_neg, a_pos, a_comb; + cff = _mm_cmpeq_epi8 (a,a); //0xff + c80 = _mm_set1_epi8(-128); //(int8_t)0x80 + c1 = _mm_set1_epi8(1); + a_mask = _mm_and_si128(a, c80); + a_mask = _mm_cmpeq_epi8(a_mask, c80); //0xff if negative input and 0 if positive + a_neg = _mm_xor_si128(a, cff); + a_neg = _mm_and_si128(a_mask, a_neg); + a_pos = _mm_andnot_si128(a_mask, a); + a_comb = _mm_or_si128(a_pos, a_neg); + a_comb = vclzq_s8(a_comb); + return _mm_sub_epi8(a_comb, c1); +} + +_NEON2SSESTORAGE int16x8_t vclsq_s16(int16x8_t a); // VCLS.S16 q0,q0 +_NEON2SSE_INLINE int16x8_t vclsq_s16(int16x8_t a) +{ + __m128i cffff, c8000, c1, a_mask, a_neg, a_pos, a_comb; + cffff = _mm_cmpeq_epi16(a,a); + c8000 = _mm_slli_epi16(cffff, 15); //0x8000 + c1 = _mm_srli_epi16(cffff,15); //0x1 + a_mask = _mm_and_si128(a, c8000); + a_mask = _mm_cmpeq_epi16(a_mask, c8000); //0xffff if negative input and 0 if positive + a_neg = _mm_xor_si128(a, cffff); + a_neg = _mm_and_si128(a_mask, a_neg); + a_pos = _mm_andnot_si128(a_mask, a); + a_comb = _mm_or_si128(a_pos, a_neg); + a_comb = vclzq_s16(a_comb); + return _mm_sub_epi16(a_comb, c1); +} + +_NEON2SSESTORAGE int32x4_t vclsq_s32(int32x4_t a); // VCLS.S32 q0,q0 +_NEON2SSE_INLINE int32x4_t vclsq_s32(int32x4_t a) +{ + __m128i cffffffff, c80000000, c1, a_mask, a_neg, a_pos, a_comb; + cffffffff = _mm_cmpeq_epi32(a,a); + c80000000 = _mm_slli_epi32(cffffffff, 31); //0x80000000 + c1 = _mm_srli_epi32(cffffffff,31); //0x1 + a_mask = _mm_and_si128(a, c80000000); + a_mask = _mm_cmpeq_epi32(a_mask, c80000000); //0xffffffff if negative input and 0 if positive + a_neg = _mm_xor_si128(a, cffffffff); + a_neg = _mm_and_si128(a_mask, a_neg); + a_pos = _mm_andnot_si128(a_mask, a); + a_comb = _mm_or_si128(a_pos, a_neg); + a_comb = vclzq_s32(a_comb); + return _mm_sub_epi32(a_comb, c1); +} + +//************************* Count number of set bits ******************************** +//************************************************************************************* +//No corresponding SIMD solution. One option is to get a elements, convert it to 32 bits and then use SSE4.2 _mm_popcnt__u32 (unsigned int v) for each element +//another option is to do the following algorithm: + +_NEON2SSESTORAGE uint8x8_t vcnt_u8(uint8x8_t a); // VCNT.8 d0,d0 +_NEON2SSE_INLINE uint8x8_t vcnt_u8(uint8x8_t a) +{ + uint8x8_t res64; + __m128i res; + res = vcntq_u8(_pM128i(a)); + return64(res); +} + +_NEON2SSE_GLOBAL int8x8_t vcnt_s8(int8x8_t a); // VCNT.8 d0,d0 +#define vcnt_s8 vcnt_u8 + +_NEON2SSE_GLOBAL poly8x8_t vcnt_p8(poly8x8_t a); // VCNT.8 d0,d0 +#define vcnt_p8 vcnt_u8 + +_NEON2SSESTORAGE uint8x16_t vcntq_u8(uint8x16_t a); // VCNT.8 q0,q0 +_NEON2SSE_INLINE uint8x16_t vcntq_u8(uint8x16_t a) +{ + _NEON2SSE_ALIGN_16 static const int8_t mask_POPCOUNT[16] = { /* 0 */ 0,/* 1 */ 1,/* 2 */ 1,/* 3 */ 2, + /* 4 */ 1,/* 5 */ 2,/* 6 */ 2,/* 7 */ 3, + /* 8 */ 1,/* 9 */ 2,/* a */ 2,/* b */ 3, + /* c */ 2,/* d */ 3,/* e */ 3,/* f */ 4}; + __m128i maskLOW, mask, lowpopcnt, hipopcnt; + maskLOW = _mm_set1_epi8(0x0f); //low 4 bits, need masking to avoid zero if MSB is set + mask = _mm_and_si128(a, maskLOW); + lowpopcnt = _mm_shuffle_epi8( *(__m128i*)mask_POPCOUNT, mask); //uses low 4 bits anyway + mask = _mm_srli_epi16(a, 4); //get high 4 bits as low bits + mask = _mm_and_si128(mask, maskLOW); //low 4 bits, need masking to avoid zero if MSB is set + hipopcnt = _mm_shuffle_epi8( *(__m128i*) mask_POPCOUNT, mask); //uses low 4 bits anyway + return _mm_add_epi8(lowpopcnt, hipopcnt); +} + +_NEON2SSE_GLOBAL int8x16_t vcntq_s8(int8x16_t a); // VCNT.8 q0,q0 +#define vcntq_s8 vcntq_u8 + +_NEON2SSE_GLOBAL poly8x16_t vcntq_p8(poly8x16_t a); // VCNT.8 q0,q0 +#define vcntq_p8 vcntq_u8 + +//************************************************************************************** +//*********************** Logical operations **************************************** +//************************************************************************************** +//************************** Bitwise not *********************************** +//several Bitwise not implementations possible for SIMD. Eg "xor" with all ones, but the following one gives good performance +_NEON2SSESTORAGE int8x8_t vmvn_s8(int8x8_t a); // VMVN d0,d0 +_NEON2SSE_INLINE int8x8_t vmvn_s8(int8x8_t a) +{ + int8x8_t res64; + __m128i res; + res = vmvnq_s8(_pM128i(a)); + return64(res); +} + +_NEON2SSESTORAGE int16x4_t vmvn_s16(int16x4_t a); // VMVN d0,d0 +_NEON2SSE_INLINE int16x4_t vmvn_s16(int16x4_t a) +{ + int16x4_t res64; + __m128i res; + res = vmvnq_s16(_pM128i(a)); + return64(res); +} + +_NEON2SSESTORAGE int32x2_t vmvn_s32(int32x2_t a); // VMVN d0,d0 +_NEON2SSE_INLINE int32x2_t vmvn_s32(int32x2_t a) +{ + int32x2_t res64; + __m128i res; + res = vmvnq_s32(_pM128i(a)); + return64(res); +} + +_NEON2SSE_GLOBAL uint8x8_t vmvn_u8(uint8x8_t a); // VMVN d0,d0 +#define vmvn_u8 vmvn_s8 + +_NEON2SSE_GLOBAL uint16x4_t vmvn_u16(uint16x4_t a); // VMVN d0,d0 +#define vmvn_u16 vmvn_s16 + +_NEON2SSE_GLOBAL uint32x2_t vmvn_u32(uint32x2_t a); // VMVN d0,d0 +#define vmvn_u32 vmvn_s32 + +_NEON2SSE_GLOBAL poly8x8_t vmvn_p8(poly8x8_t a); // VMVN d0,d0 +#define vmvn_p8 vmvn_u8 + +_NEON2SSESTORAGE int8x16_t vmvnq_s8(int8x16_t a); // VMVN q0,q0 +_NEON2SSE_INLINE int8x16_t vmvnq_s8(int8x16_t a) // VMVN q0,q0 +{ + __m128i c1; + c1 = _mm_cmpeq_epi8 (a,a); //0xff + return _mm_andnot_si128 (a, c1); +} + +_NEON2SSESTORAGE int16x8_t vmvnq_s16(int16x8_t a); // VMVN q0,q0 +_NEON2SSE_INLINE int16x8_t vmvnq_s16(int16x8_t a) // VMVN q0,q0 +{ + __m128i c1; + c1 = _mm_cmpeq_epi16 (a,a); //0xffff + return _mm_andnot_si128 (a, c1); +} + +_NEON2SSESTORAGE int32x4_t vmvnq_s32(int32x4_t a); // VMVN q0,q0 +_NEON2SSE_INLINE int32x4_t vmvnq_s32(int32x4_t a) // VMVN q0,q0 +{ + __m128i c1; + c1 = _mm_cmpeq_epi32 (a,a); //0xffffffff + return _mm_andnot_si128 (a, c1); +} + +_NEON2SSE_GLOBAL uint8x16_t vmvnq_u8(uint8x16_t a); // VMVN q0,q0 +#define vmvnq_u8 vmvnq_s8 + +_NEON2SSE_GLOBAL uint16x8_t vmvnq_u16(uint16x8_t a); // VMVN q0,q0 +#define vmvnq_u16 vmvnq_s16 + +_NEON2SSE_GLOBAL uint32x4_t vmvnq_u32(uint32x4_t a); // VMVN q0,q0 +#define vmvnq_u32 vmvnq_s32 + +_NEON2SSE_GLOBAL poly8x16_t vmvnq_p8(poly8x16_t a); // VMVN q0,q0 +#define vmvnq_p8 vmvnq_u8 + +//****************** Bitwise and *********************** +//****************************************************** +_NEON2SSESTORAGE int8x8_t vand_s8(int8x8_t a, int8x8_t b); // VAND d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vand_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_and_si128(_pM128i(a),_pM128i(b))); +} + +_NEON2SSESTORAGE int16x4_t vand_s16(int16x4_t a, int16x4_t b); // VAND d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vand_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_and_si128(_pM128i(a),_pM128i(b))); +} + +_NEON2SSESTORAGE int32x2_t vand_s32(int32x2_t a, int32x2_t b); // VAND d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vand_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(_mm_and_si128(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vand_s64(int64x1_t a, int64x1_t b); // VAND d0,d0,d0 +_NEON2SSE_INLINE int64x1_t vand_s64(int64x1_t a, int64x1_t b) +{ + int64x1_t res; + res.m64_i64[0] = a.m64_i64[0] & b.m64_i64[0]; + return res; +} + +_NEON2SSE_GLOBAL uint8x8_t vand_u8(uint8x8_t a, uint8x8_t b); // VAND d0,d0,d0 +#define vand_u8 vand_s8 + +_NEON2SSE_GLOBAL uint16x4_t vand_u16(uint16x4_t a, uint16x4_t b); // VAND d0,d0,d0 +#define vand_u16 vand_s16 + +_NEON2SSE_GLOBAL uint32x2_t vand_u32(uint32x2_t a, uint32x2_t b); // VAND d0,d0,d0 +#define vand_u32 vand_s32 + +_NEON2SSE_GLOBAL uint64x1_t vand_u64(uint64x1_t a, uint64x1_t b); // VAND d0,d0,d0 +#define vand_u64 vand_s64 + + +_NEON2SSE_GLOBAL int8x16_t vandq_s8(int8x16_t a, int8x16_t b); // VAND q0,q0,q0 +#define vandq_s8 _mm_and_si128 + +_NEON2SSE_GLOBAL int16x8_t vandq_s16(int16x8_t a, int16x8_t b); // VAND q0,q0,q0 +#define vandq_s16 _mm_and_si128 + +_NEON2SSE_GLOBAL int32x4_t vandq_s32(int32x4_t a, int32x4_t b); // VAND q0,q0,q0 +#define vandq_s32 _mm_and_si128 + +_NEON2SSE_GLOBAL int64x2_t vandq_s64(int64x2_t a, int64x2_t b); // VAND q0,q0,q0 +#define vandq_s64 _mm_and_si128 + +_NEON2SSE_GLOBAL uint8x16_t vandq_u8(uint8x16_t a, uint8x16_t b); // VAND q0,q0,q0 +#define vandq_u8 _mm_and_si128 + +_NEON2SSE_GLOBAL uint16x8_t vandq_u16(uint16x8_t a, uint16x8_t b); // VAND q0,q0,q0 +#define vandq_u16 _mm_and_si128 + +_NEON2SSE_GLOBAL uint32x4_t vandq_u32(uint32x4_t a, uint32x4_t b); // VAND q0,q0,q0 +#define vandq_u32 _mm_and_si128 + +_NEON2SSE_GLOBAL uint64x2_t vandq_u64(uint64x2_t a, uint64x2_t b); // VAND q0,q0,q0 +#define vandq_u64 _mm_and_si128 + +//******************** Bitwise or ********************************* +//****************************************************************** +_NEON2SSESTORAGE int8x8_t vorr_s8(int8x8_t a, int8x8_t b); // VORR d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vorr_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_or_si128(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vorr_s16(int16x4_t a, int16x4_t b); // VORR d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vorr_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(_mm_or_si128(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vorr_s32(int32x2_t a, int32x2_t b); // VORR d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vorr_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(_mm_or_si128(_pM128i(a),_pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vorr_s64(int64x1_t a, int64x1_t b); // VORR d0,d0,d0 +_NEON2SSE_INLINE int64x1_t vorr_s64(int64x1_t a, int64x1_t b) +{ + int64x1_t res; + res.m64_i64[0] = a.m64_i64[0] | b.m64_i64[0]; + return res; +} + +_NEON2SSE_GLOBAL uint8x8_t vorr_u8(uint8x8_t a, uint8x8_t b); // VORR d0,d0,d0 +#define vorr_u8 vorr_s8 + +_NEON2SSE_GLOBAL uint16x4_t vorr_u16(uint16x4_t a, uint16x4_t b); // VORR d0,d0,d0 +#define vorr_u16 vorr_s16 + +_NEON2SSE_GLOBAL uint32x2_t vorr_u32(uint32x2_t a, uint32x2_t b); // VORR d0,d0,d0 +#define vorr_u32 vorr_s32 + +_NEON2SSE_GLOBAL uint64x1_t vorr_u64(uint64x1_t a, uint64x1_t b); // VORR d0,d0,d0 +#define vorr_u64 vorr_s64 + +_NEON2SSE_GLOBAL int8x16_t vorrq_s8(int8x16_t a, int8x16_t b); // VORR q0,q0,q0 +#define vorrq_s8 _mm_or_si128 + +_NEON2SSE_GLOBAL int16x8_t vorrq_s16(int16x8_t a, int16x8_t b); // VORR q0,q0,q0 +#define vorrq_s16 _mm_or_si128 + +_NEON2SSE_GLOBAL int32x4_t vorrq_s32(int32x4_t a, int32x4_t b); // VORR q0,q0,q0 +#define vorrq_s32 _mm_or_si128 + +_NEON2SSE_GLOBAL int64x2_t vorrq_s64(int64x2_t a, int64x2_t b); // VORR q0,q0,q0 +#define vorrq_s64 _mm_or_si128 + +_NEON2SSE_GLOBAL uint8x16_t vorrq_u8(uint8x16_t a, uint8x16_t b); // VORR q0,q0,q0 +#define vorrq_u8 _mm_or_si128 + +_NEON2SSE_GLOBAL uint16x8_t vorrq_u16(uint16x8_t a, uint16x8_t b); // VORR q0,q0,q0 +#define vorrq_u16 _mm_or_si128 + +_NEON2SSE_GLOBAL uint32x4_t vorrq_u32(uint32x4_t a, uint32x4_t b); // VORR q0,q0,q0 +#define vorrq_u32 _mm_or_si128 + +_NEON2SSE_GLOBAL uint64x2_t vorrq_u64(uint64x2_t a, uint64x2_t b); // VORR q0,q0,q0 +#define vorrq_u64 _mm_or_si128 + +//************* Bitwise exclusive or (EOR or XOR) ****************** +//******************************************************************* +_NEON2SSESTORAGE int8x8_t veor_s8(int8x8_t a, int8x8_t b); // VEOR d0,d0,d0 +_NEON2SSE_INLINE int8x8_t veor_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_xor_si128(_pM128i(a),_pM128i(b))); +} + +_NEON2SSE_GLOBAL int16x4_t veor_s16(int16x4_t a, int16x4_t b); // VEOR d0,d0,d0 +#define veor_s16 veor_s8 + +_NEON2SSE_GLOBAL int32x2_t veor_s32(int32x2_t a, int32x2_t b); // VEOR d0,d0,d0 +#define veor_s32 veor_s8 + +_NEON2SSESTORAGE int64x1_t veor_s64(int64x1_t a, int64x1_t b); // VEOR d0,d0,d0 +_NEON2SSE_INLINE int64x1_t veor_s64(int64x1_t a, int64x1_t b) +{ + int64x1_t res; + res.m64_i64[0] = a.m64_i64[0] ^ b.m64_i64[0]; + return res; +} + +_NEON2SSE_GLOBAL uint8x8_t veor_u8(uint8x8_t a, uint8x8_t b); // VEOR d0,d0,d0 +#define veor_u8 veor_s8 + +_NEON2SSE_GLOBAL uint16x4_t veor_u16(uint16x4_t a, uint16x4_t b); // VEOR d0,d0,d0 +#define veor_u16 veor_s16 + +_NEON2SSE_GLOBAL uint32x2_t veor_u32(uint32x2_t a, uint32x2_t b); // VEOR d0,d0,d0 +#define veor_u32 veor_s32 + +_NEON2SSE_GLOBAL uint64x1_t veor_u64(uint64x1_t a, uint64x1_t b); // VEOR d0,d0,d0 +#define veor_u64 veor_s64 + +_NEON2SSE_GLOBAL int8x16_t veorq_s8(int8x16_t a, int8x16_t b); // VEOR q0,q0,q0 +#define veorq_s8 _mm_xor_si128 + +_NEON2SSE_GLOBAL int16x8_t veorq_s16(int16x8_t a, int16x8_t b); // VEOR q0,q0,q0 +#define veorq_s16 _mm_xor_si128 + +_NEON2SSE_GLOBAL int32x4_t veorq_s32(int32x4_t a, int32x4_t b); // VEOR q0,q0,q0 +#define veorq_s32 _mm_xor_si128 + +_NEON2SSE_GLOBAL int64x2_t veorq_s64(int64x2_t a, int64x2_t b); // VEOR q0,q0,q0 +#define veorq_s64 _mm_xor_si128 + +_NEON2SSE_GLOBAL uint8x16_t veorq_u8(uint8x16_t a, uint8x16_t b); // VEOR q0,q0,q0 +#define veorq_u8 _mm_xor_si128 + +_NEON2SSE_GLOBAL uint16x8_t veorq_u16(uint16x8_t a, uint16x8_t b); // VEOR q0,q0,q0 +#define veorq_u16 _mm_xor_si128 + +_NEON2SSE_GLOBAL uint32x4_t veorq_u32(uint32x4_t a, uint32x4_t b); // VEOR q0,q0,q0 +#define veorq_u32 _mm_xor_si128 + +_NEON2SSE_GLOBAL uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b); // VEOR q0,q0,q0 +#define veorq_u64 _mm_xor_si128 + +//********************** Bit Clear ********************************** +//******************************************************************* +//Logical AND complement (AND negation or AND NOT) +_NEON2SSESTORAGE int8x8_t vbic_s8(int8x8_t a, int8x8_t b); // VBIC d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vbic_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(_mm_andnot_si128(_pM128i(b),_pM128i(a))); //notice the arguments "swap" +} + +_NEON2SSE_GLOBAL int16x4_t vbic_s16(int16x4_t a, int16x4_t b); // VBIC d0,d0,d0 +#define vbic_s16 vbic_s8 + +_NEON2SSE_GLOBAL int32x2_t vbic_s32(int32x2_t a, int32x2_t b); // VBIC d0,d0,d0 +#define vbic_s32 vbic_s8 + +_NEON2SSESTORAGE int64x1_t vbic_s64(int64x1_t a, int64x1_t b); // VBIC d0,d0,d0 +_NEON2SSE_INLINE int64x1_t vbic_s64(int64x1_t a, int64x1_t b) +{ + int64x1_t res; + res.m64_i64[0] = a.m64_i64[0] & (~b.m64_i64[0]); + return res; +} + +_NEON2SSE_GLOBAL uint8x8_t vbic_u8(uint8x8_t a, uint8x8_t b); // VBIC d0,d0,d0 +#define vbic_u8 vbic_s8 + +_NEON2SSE_GLOBAL uint16x4_t vbic_u16(uint16x4_t a, uint16x4_t b); // VBIC d0,d0,d0 +#define vbic_u16 vbic_s16 + +_NEON2SSE_GLOBAL uint32x2_t vbic_u32(uint32x2_t a, uint32x2_t b); // VBIC d0,d0,d0 +#define vbic_u32 vbic_s32 + +_NEON2SSE_GLOBAL uint64x1_t vbic_u64(uint64x1_t a, uint64x1_t b); // VBIC d0,d0,d0 +#define vbic_u64 vbic_s64 + +_NEON2SSE_GLOBAL int8x16_t vbicq_s8(int8x16_t a, int8x16_t b); // VBIC q0,q0,q0 +#define vbicq_s8(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +_NEON2SSE_GLOBAL int16x8_t vbicq_s16(int16x8_t a, int16x8_t b); // VBIC q0,q0,q0 +#define vbicq_s16(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +_NEON2SSE_GLOBAL int32x4_t vbicq_s32(int32x4_t a, int32x4_t b); // VBIC q0,q0,q0 +#define vbicq_s32(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +_NEON2SSE_GLOBAL int64x2_t vbicq_s64(int64x2_t a, int64x2_t b); // VBIC q0,q0,q0 +#define vbicq_s64(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +_NEON2SSE_GLOBAL uint8x16_t vbicq_u8(uint8x16_t a, uint8x16_t b); // VBIC q0,q0,q0 +#define vbicq_u8(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +_NEON2SSE_GLOBAL uint16x8_t vbicq_u16(uint16x8_t a, uint16x8_t b); // VBIC q0,q0,q0 +#define vbicq_u16(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +_NEON2SSE_GLOBAL uint32x4_t vbicq_u32(uint32x4_t a, uint32x4_t b); // VBIC q0,q0,q0 +#define vbicq_u32(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +_NEON2SSE_GLOBAL uint64x2_t vbicq_u64(uint64x2_t a, uint64x2_t b); // VBIC q0,q0,q0 +#define vbicq_u64(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap" + +//**************** Bitwise OR complement ******************************** +//**************************************** ******************************** +//no exact IA 32 match, need to implement it as following +_NEON2SSESTORAGE int8x8_t vorn_s8(int8x8_t a, int8x8_t b); // VORN d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vorn_s8(int8x8_t a, int8x8_t b) +{ + int8x8_t res64; + return64(vornq_s8(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int16x4_t vorn_s16(int16x4_t a, int16x4_t b); // VORN d0,d0,d0 +_NEON2SSE_INLINE int16x4_t vorn_s16(int16x4_t a, int16x4_t b) +{ + int16x4_t res64; + return64(vornq_s16(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int32x2_t vorn_s32(int32x2_t a, int32x2_t b); // VORN d0,d0,d0 +_NEON2SSE_INLINE int32x2_t vorn_s32(int32x2_t a, int32x2_t b) +{ + int32x2_t res64; + return64(vornq_s32(_pM128i(a), _pM128i(b))); +} + + +_NEON2SSESTORAGE int64x1_t vorn_s64(int64x1_t a, int64x1_t b); // VORN d0,d0,d0 +_NEON2SSE_INLINE int64x1_t vorn_s64(int64x1_t a, int64x1_t b) +{ + int64x1_t res; + res.m64_i64[0] = a.m64_i64[0] | (~b.m64_i64[0]); + return res; +} + +_NEON2SSE_GLOBAL uint8x8_t vorn_u8(uint8x8_t a, uint8x8_t b); // VORN d0,d0,d0 +#define vorn_u8 vorn_s8 + + +_NEON2SSE_GLOBAL uint16x4_t vorn_u16(uint16x4_t a, uint16x4_t b); // VORN d0,d0,d0 +#define vorn_u16 vorn_s16 + +_NEON2SSE_GLOBAL uint32x2_t vorn_u32(uint32x2_t a, uint32x2_t b); // VORN d0,d0,d0 +#define vorn_u32 vorn_s32 + +_NEON2SSE_GLOBAL uint64x1_t vorn_u64(uint64x1_t a, uint64x1_t b); // VORN d0,d0,d0 +#define vorn_u64 vorn_s64 + + +_NEON2SSESTORAGE int8x16_t vornq_s8(int8x16_t a, int8x16_t b); // VORN q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vornq_s8(int8x16_t a, int8x16_t b) // VORN q0,q0,q0 +{ + __m128i b1; + b1 = vmvnq_s8( b); //bitwise not for b + return _mm_or_si128 (a, b1); +} + +_NEON2SSESTORAGE int16x8_t vornq_s16(int16x8_t a, int16x8_t b); // VORN q0,q0,q0 +_NEON2SSE_INLINE int16x8_t vornq_s16(int16x8_t a, int16x8_t b) // VORN q0,q0,q0 +{ + __m128i b1; + b1 = vmvnq_s16( b); //bitwise not for b + return _mm_or_si128 (a, b1); +} + +_NEON2SSESTORAGE int32x4_t vornq_s32(int32x4_t a, int32x4_t b); // VORN q0,q0,q0 +_NEON2SSE_INLINE int32x4_t vornq_s32(int32x4_t a, int32x4_t b) // VORN q0,q0,q0 +{ + __m128i b1; + b1 = vmvnq_s32( b); //bitwise not for b + return _mm_or_si128 (a, b1); +} + +_NEON2SSESTORAGE int64x2_t vornq_s64(int64x2_t a, int64x2_t b); // VORN q0,q0,q0 +_NEON2SSE_INLINE int64x2_t vornq_s64(int64x2_t a, int64x2_t b) +{ + __m128i c1, b1; + c1 = _mm_cmpeq_epi8 (a, a); //all ones 0xfffffff...fffff + b1 = _mm_andnot_si128 (b, c1); + return _mm_or_si128 (a, b1); +} + +_NEON2SSESTORAGE uint8x16_t vornq_u8(uint8x16_t a, uint8x16_t b); // VORN q0,q0,q0 +_NEON2SSE_INLINE uint8x16_t vornq_u8(uint8x16_t a, uint8x16_t b) // VORN q0,q0,q0 +{ + __m128i b1; + b1 = vmvnq_u8( b); //bitwise not for b + return _mm_or_si128 (a, b1); +} + +_NEON2SSESTORAGE uint16x8_t vornq_u16(uint16x8_t a, uint16x8_t b); // VORN q0,q0,q0 +_NEON2SSE_INLINE uint16x8_t vornq_u16(uint16x8_t a, uint16x8_t b) // VORN q0,q0,q0 +{ + __m128i b1; + b1 = vmvnq_s16( b); //bitwise not for b + return _mm_or_si128 (a, b1); +} + +_NEON2SSESTORAGE uint32x4_t vornq_u32(uint32x4_t a, uint32x4_t b); // VORN q0,q0,q0 +_NEON2SSE_INLINE uint32x4_t vornq_u32(uint32x4_t a, uint32x4_t b) // VORN q0,q0,q0 +{ + __m128i b1; + b1 = vmvnq_u32( b); //bitwise not for b + return _mm_or_si128 (a, b1); +} +_NEON2SSE_GLOBAL uint64x2_t vornq_u64(uint64x2_t a, uint64x2_t b); // VORN q0,q0,q0 +#define vornq_u64 vornq_s64 + +//********************* Bitwise Select ***************************** +//****************************************************************** +//Note This intrinsic can compile to any of VBSL/VBIF/VBIT depending on register allocation.(?????????) + +//VBSL (Bitwise Select) selects each bit for the destination from the first operand if the +//corresponding bit of the destination is 1, or from the second operand if the corresponding bit of the destination is 0. + +//VBIF (Bitwise Insert if False) inserts each bit from the first operand into the destination +//if the corresponding bit of the second operand is 0, otherwise leaves the destination bit unchanged + +//VBIT (Bitwise Insert if True) inserts each bit from the first operand into the destination +//if the corresponding bit of the second operand is 1, otherwise leaves the destination bit unchanged. + +//VBSL only is implemented for SIMD +_NEON2SSESTORAGE int8x8_t vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c); // VBSL d0,d0,d0 +_NEON2SSE_INLINE int8x8_t vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) +{ + int8x8_t res64; + __m128i res; + res = vbslq_s8(_pM128i(a), _pM128i(b), _pM128i(c)); + return64(res); +} + +_NEON2SSE_GLOBAL int16x4_t vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c); // VBSL d0,d0,d0 +#define vbsl_s16 vbsl_s8 + +_NEON2SSE_GLOBAL int32x2_t vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c); // VBSL d0,d0,d0 +#define vbsl_s32 vbsl_s8 + +_NEON2SSESTORAGE int64x1_t vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c); // VBSL d0,d0,d0 +_NEON2SSE_INLINE int64x1_t vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) +{ + int64x1_t res; + res.m64_i64[0] = (a.m64_i64[0] & b.m64_i64[0]) | ( (~a.m64_i64[0]) & c.m64_i64[0]); + return res; +} + +_NEON2SSE_GLOBAL uint8x8_t vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c); // VBSL d0,d0,d0 +#define vbsl_u8 vbsl_s8 + +_NEON2SSE_GLOBAL uint16x4_t vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c); // VBSL d0,d0,d0 +#define vbsl_u16 vbsl_s8 + +_NEON2SSE_GLOBAL uint32x2_t vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c); // VBSL d0,d0,d0 +#define vbsl_u32 vbsl_s8 + +_NEON2SSE_GLOBAL uint64x1_t vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c); // VBSL d0,d0,d0 +#define vbsl_u64 vbsl_s64 + +_NEON2SSESTORAGE float32x2_t vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c); // VBSL d0,d0,d0 +_NEON2SSE_INLINE float32x2_t vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) +{ + __m128 sel1, sel2; + __m64_128 res64; + sel1 = _mm_and_ps (_pM128(a), _pM128(b)); + sel2 = _mm_andnot_ps (_pM128(a), _pM128(c)); + sel1 = _mm_or_ps (sel1, sel2); + _M64f(res64, sel1); + return res64; +} + +_NEON2SSE_GLOBAL poly8x8_t vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c); // VBSL d0,d0,d0 +#define vbsl_p8 vbsl_s8 + +_NEON2SSE_GLOBAL poly16x4_t vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c); // VBSL d0,d0,d0 +#define vbsl_p16 vbsl_s8 + +_NEON2SSESTORAGE int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c); // VBSL q0,q0,q0 +_NEON2SSE_INLINE int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) // VBSL q0,q0,q0 +{ + __m128i sel1, sel2; + sel1 = _mm_and_si128 (a, b); + sel2 = _mm_andnot_si128 (a, c); + return _mm_or_si128 (sel1, sel2); +} + +_NEON2SSE_GLOBAL int16x8_t vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c); // VBSL q0,q0,q0 +#define vbslq_s16 vbslq_s8 + +_NEON2SSE_GLOBAL int32x4_t vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c); // VBSL q0,q0,q0 +#define vbslq_s32 vbslq_s8 + +_NEON2SSE_GLOBAL int64x2_t vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c); // VBSL q0,q0,q0 +#define vbslq_s64 vbslq_s8 + +_NEON2SSE_GLOBAL uint8x16_t vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VBSL q0,q0,q0 +#define vbslq_u8 vbslq_s8 + +_NEON2SSE_GLOBAL uint16x8_t vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VBSL q0,q0,q0 +#define vbslq_u16 vbslq_s8 + +_NEON2SSE_GLOBAL uint32x4_t vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VBSL q0,q0,q0 +#define vbslq_u32 vbslq_s8 + +_NEON2SSE_GLOBAL uint64x2_t vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c); // VBSL q0,q0,q0 +#define vbslq_u64 vbslq_s8 + +_NEON2SSESTORAGE float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c); // VBSL q0,q0,q0 +_NEON2SSE_INLINE float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) // VBSL q0,q0,q0 +{ + __m128 sel1, sel2; + sel1 = _mm_and_ps (*(__m128*)&a, b); + sel2 = _mm_andnot_ps (*(__m128*)&a, c); + return _mm_or_ps (sel1, sel2); +} + +_NEON2SSE_GLOBAL poly8x16_t vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c); // VBSL q0,q0,q0 +#define vbslq_p8 vbslq_u8 + +_NEON2SSE_GLOBAL poly16x8_t vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c); // VBSL q0,q0,q0 +#define vbslq_p16 vbslq_s8 + +//************************************************************************************ +//**************** Transposition operations **************************************** +//************************************************************************************ +//***************** Vector Transpose ************************************************ +//************************************************************************************ +//VTRN (Vector Transpose) treats the elements of its operand vectors as elements of 2 x 2 matrices, and transposes the matrices. +// making the result look as (a0, b0, a2, b2, a4, b4,....) (a1, b1, a3, b3, a5, b5,.....) +_NEON2SSESTORAGE int8x8x2_t vtrn_s8(int8x8_t a, int8x8_t b); // VTRN.8 d0,d0 +_NEON2SSE_INLINE int8x8x2_t vtrn_s8(int8x8_t a, int8x8_t b) // VTRN.8 d0,d0 +{ + int8x8x2_t val; + __m128i tmp, val0; + tmp = _mm_unpacklo_epi8(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3,...,a7,b7 + val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)mask8_32_even_odd); //(a0, b0, a2, b2, a4, b4, a6, b6), (a1,b1, a3,b3, a5,b5, a7,b7) + vst1q_s8 (val.val, val0); // _mm_shuffle_epi32 (val.val[0], _SWAP_HI_LOW32); //(a1,b1, a3,b3, a5,b5, a7,b7),(a0, b0, a2, b2, a4, b4, a6, b6), + return val; +} + +_NEON2SSESTORAGE int16x4x2_t vtrn_s16(int16x4_t a, int16x4_t b); // VTRN.16 d0,d0 +_NEON2SSE_INLINE int16x4x2_t vtrn_s16(int16x4_t a, int16x4_t b) // VTRN.16 d0,d0 +{ + int16x4x2_t val; + __m128i tmp, val0; + _NEON2SSE_ALIGN_16 static const int8_t maskdlv16[16] = {0,1, 2,3, 8,9, 10,11, 4,5, 6,7, 12,13, 14, 15}; + tmp = _mm_unpacklo_epi16(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3 + val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)maskdlv16); //a0, b0, a2, b2, a1,b1, a3, b3 + vst1q_s16(val.val, val0); // _mm_shuffle_epi32 (val.val[0], _SWAP_HI_LOW32); //(a1,b1, a3,b3),(a0, b0, a2, b2), + return val; +} + +_NEON2SSESTORAGE int32x2x2_t vtrn_s32(int32x2_t a, int32x2_t b); // VTRN.32 d0,d0 +_NEON2SSE_INLINE int32x2x2_t vtrn_s32(int32x2_t a, int32x2_t b) +{ + int32x2x2_t val; + __m128i val0; + val0 = _mm_unpacklo_epi32(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1 + vst1q_s32(val.val, val0); // _mm_shuffle_epi32(val.val[0], _SWAP_HI_LOW32); //a1,b1, a0,b0, + return val; +} + +_NEON2SSE_GLOBAL uint8x8x2_t vtrn_u8(uint8x8_t a, uint8x8_t b); // VTRN.8 d0,d0 +#define vtrn_u8 vtrn_s8 + +_NEON2SSE_GLOBAL uint16x4x2_t vtrn_u16(uint16x4_t a, uint16x4_t b); // VTRN.16 d0,d0 +#define vtrn_u16 vtrn_s16 + +_NEON2SSE_GLOBAL uint32x2x2_t vtrn_u32(uint32x2_t a, uint32x2_t b); // VTRN.32 d0,d0 +#define vtrn_u32 vtrn_s32 + +_NEON2SSESTORAGE float32x2x2_t vtrn_f32(float32x2_t a, float32x2_t b); // VTRN.32 d0,d0 +_NEON2SSE_INLINE float32x2x2_t vtrn_f32(float32x2_t a, float32x2_t b) +{ + float32x2x2_t val; + val.val[0].m64_f32[0] = a.m64_f32[0]; + val.val[0].m64_f32[1] = b.m64_f32[0]; + val.val[1].m64_f32[0] = a.m64_f32[1]; + val.val[1].m64_f32[1] = b.m64_f32[1]; + return val; //a0,b0,a1,b1 +} + +_NEON2SSE_GLOBAL poly8x8x2_t vtrn_p8(poly8x8_t a, poly8x8_t b); // VTRN.8 d0,d0 +#define vtrn_p8 vtrn_u8 + +_NEON2SSE_GLOBAL poly16x4x2_t vtrn_p16(poly16x4_t a, poly16x4_t b); // VTRN.16 d0,d0 +#define vtrn_p16 vtrn_s16 + +//int8x16x2_t vtrnq_s8(int8x16_t a, int8x16_t b); // VTRN.8 q0,q0 +_NEON2SSE_INLINE int8x16x2_t vtrnq_s8(int8x16_t a, int8x16_t b) // VTRN.8 q0,q0 +{ + int8x16x2_t r8x16; + __m128i a_sh, b_sh; + a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask8_16_even_odd); //a0, a2, a4, a6, a8, a10, a12, a14, a1, a3, a5, a7, a9, a11, a13, a15 + b_sh = _mm_shuffle_epi8 (b, *(__m128i*)mask8_16_even_odd); //b0, b2, b4, b6, b8, b10, b12, b14, b1, b3, b5, b7, b9, b11, b13, b15 + + r8x16.val[0] = _mm_unpacklo_epi8(a_sh, b_sh); //(a0, b0, a2, b2, a4, b4, a6, b6, a8,b8, a10,b10, a12,b12, a14,b14) + r8x16.val[1] = _mm_unpackhi_epi8(a_sh, b_sh); // (a1, b1, a3, b3, a5, b5, a7, b7, a9,b9, a11,b11, a13,b13, a15,b15) + return r8x16; +} + +_NEON2SSESTORAGE int16x8x2_t vtrnq_s16(int16x8_t a, int16x8_t b); // VTRN.16 q0,q0 +_NEON2SSE_INLINE int16x8x2_t vtrnq_s16(int16x8_t a, int16x8_t b) // VTRN.16 q0,q0 +{ + int16x8x2_t v16x8; + __m128i a_sh, b_sh; + a_sh = _mm_shuffle_epi8 (a, *(__m128i*) mask8_32_even_odd); //a0, a2, a4, a6, a1, a3, a5, a7 + b_sh = _mm_shuffle_epi8 (b, *(__m128i*) mask8_32_even_odd); //b0, b2, b4, b6, b1, b3, b5, b7 + v16x8.val[0] = _mm_unpacklo_epi16(a_sh, b_sh); //a0, b0, a2, b2, a4, b4, a6, b6 + v16x8.val[1] = _mm_unpackhi_epi16(a_sh, b_sh); //a1, b1, a3, b3, a5, b5, a7, b7 + return v16x8; +} + +_NEON2SSESTORAGE int32x4x2_t vtrnq_s32(int32x4_t a, int32x4_t b); // VTRN.32 q0,q0 +_NEON2SSE_INLINE int32x4x2_t vtrnq_s32(int32x4_t a, int32x4_t b) // VTRN.32 q0,q0 +{ + //may be not optimal solution compared with serial + int32x4x2_t v32x4; + __m128i a_sh, b_sh; + a_sh = _mm_shuffle_epi32 (a, 216); //a0, a2, a1, a3 + b_sh = _mm_shuffle_epi32 (b, 216); //b0, b2, b1, b3 + + v32x4.val[0] = _mm_unpacklo_epi32(a_sh, b_sh); //a0, b0, a2, b2 + v32x4.val[1] = _mm_unpackhi_epi32(a_sh, b_sh); //a1, b1, a3, b3 + return v32x4; +} + +_NEON2SSE_GLOBAL uint8x16x2_t vtrnq_u8(uint8x16_t a, uint8x16_t b); // VTRN.8 q0,q0 +#define vtrnq_u8 vtrnq_s8 + +_NEON2SSE_GLOBAL uint16x8x2_t vtrnq_u16(uint16x8_t a, uint16x8_t b); // VTRN.16 q0,q0 +#define vtrnq_u16 vtrnq_s16 + +_NEON2SSE_GLOBAL uint32x4x2_t vtrnq_u32(uint32x4_t a, uint32x4_t b); // VTRN.32 q0,q0 +#define vtrnq_u32 vtrnq_s32 + +_NEON2SSESTORAGE float32x4x2_t vtrnq_f32(float32x4_t a, float32x4_t b); // VTRN.32 q0,q0 +_NEON2SSE_INLINE float32x4x2_t vtrnq_f32(float32x4_t a, float32x4_t b) // VTRN.32 q0,q0 +{ + //may be not optimal solution compared with serial + float32x4x2_t f32x4; + __m128 a_sh, b_sh; + a_sh = _mm_shuffle_ps (a, a, _MM_SHUFFLE(3,1, 2, 0)); //a0, a2, a1, a3, need to check endiness + b_sh = _mm_shuffle_ps (b, b, _MM_SHUFFLE(3,1, 2, 0)); //b0, b2, b1, b3, need to check endiness + + f32x4.val[0] = _mm_unpacklo_ps(a_sh, b_sh); //a0, b0, a2, b2 + f32x4.val[1] = _mm_unpackhi_ps(a_sh, b_sh); //a1, b1, a3, b3 + return f32x4; +} + +_NEON2SSE_GLOBAL poly8x16x2_t vtrnq_p8(poly8x16_t a, poly8x16_t b); // VTRN.8 q0,q0 +#define vtrnq_p8 vtrnq_s8 + +_NEON2SSE_GLOBAL poly16x8x2_t vtrnq_p16(poly16x8_t a, poly16x8_t b); // VTRN.16 q0,q0 +#define vtrnq_p16 vtrnq_s16 + +//***************** Interleave elements *************************** +//***************************************************************** +//output has (a0,b0,a1,b1, a2,b2,.....) +_NEON2SSESTORAGE int8x8x2_t vzip_s8(int8x8_t a, int8x8_t b); // VZIP.8 d0,d0 +_NEON2SSE_INLINE int8x8x2_t vzip_s8(int8x8_t a, int8x8_t b) // VZIP.8 d0,d0 +{ + int8x8x2_t val; + __m128i val0; + val0 = _mm_unpacklo_epi8(_pM128i(a), _pM128i(b)); + vst1q_s8(val.val, val0); //_mm_shuffle_epi32(val.val[0], _SWAP_HI_LOW32); + return val; +} + +_NEON2SSESTORAGE int16x4x2_t vzip_s16(int16x4_t a, int16x4_t b); // VZIP.16 d0,d0 +_NEON2SSE_INLINE int16x4x2_t vzip_s16(int16x4_t a, int16x4_t b) // VZIP.16 d0,d0 +{ + int16x4x2_t val; + __m128i val0; + val0 = _mm_unpacklo_epi16(_pM128i(a), _pM128i(b)); + vst1q_s16(val.val, val0); // _mm_shuffle_epi32(val.val[0], _SWAP_HI_LOW32); + return val; +} + +_NEON2SSE_GLOBAL int32x2x2_t vzip_s32(int32x2_t a, int32x2_t b); // VZIP.32 d0,d0 +#define vzip_s32 vtrn_s32 + +_NEON2SSE_GLOBAL uint8x8x2_t vzip_u8(uint8x8_t a, uint8x8_t b); // VZIP.8 d0,d0 +#define vzip_u8 vzip_s8 + +_NEON2SSE_GLOBAL uint16x4x2_t vzip_u16(uint16x4_t a, uint16x4_t b); // VZIP.16 d0,d0 +#define vzip_u16 vzip_s16 + +_NEON2SSE_GLOBAL uint32x2x2_t vzip_u32(uint32x2_t a, uint32x2_t b); // VZIP.32 d0,d0 +#define vzip_u32 vzip_s32 + +_NEON2SSE_GLOBAL float32x2x2_t vzip_f32(float32x2_t a, float32x2_t b); // VZIP.32 d0,d0 +#define vzip_f32 vtrn_f32 + +_NEON2SSE_GLOBAL poly8x8x2_t vzip_p8(poly8x8_t a, poly8x8_t b); // VZIP.8 d0,d0 +#define vzip_p8 vzip_u8 + +_NEON2SSE_GLOBAL poly16x4x2_t vzip_p16(poly16x4_t a, poly16x4_t b); // VZIP.16 d0,d0 +#define vzip_p16 vzip_u16 + +_NEON2SSESTORAGE int8x16x2_t vzipq_s8(int8x16_t a, int8x16_t b); // VZIP.8 q0,q0 +_NEON2SSE_INLINE int8x16x2_t vzipq_s8(int8x16_t a, int8x16_t b) // VZIP.8 q0,q0 +{ + int8x16x2_t r8x16; + r8x16.val[0] = _mm_unpacklo_epi8(a, b); + r8x16.val[1] = _mm_unpackhi_epi8(a, b); + return r8x16; +} + +_NEON2SSESTORAGE int16x8x2_t vzipq_s16(int16x8_t a, int16x8_t b); // VZIP.16 q0,q0 +_NEON2SSE_INLINE int16x8x2_t vzipq_s16(int16x8_t a, int16x8_t b) // VZIP.16 q0,q0 +{ + int16x8x2_t r16x8; + r16x8.val[0] = _mm_unpacklo_epi16(a, b); + r16x8.val[1] = _mm_unpackhi_epi16(a, b); + return r16x8; +} + +_NEON2SSESTORAGE int32x4x2_t vzipq_s32(int32x4_t a, int32x4_t b); // VZIP.32 q0,q0 +_NEON2SSE_INLINE int32x4x2_t vzipq_s32(int32x4_t a, int32x4_t b) // VZIP.32 q0,q0 +{ + int32x4x2_t r32x4; + r32x4.val[0] = _mm_unpacklo_epi32(a, b); + r32x4.val[1] = _mm_unpackhi_epi32(a, b); + return r32x4; +} + +_NEON2SSE_GLOBAL uint8x16x2_t vzipq_u8(uint8x16_t a, uint8x16_t b); // VZIP.8 q0,q0 +#define vzipq_u8 vzipq_s8 + +_NEON2SSE_GLOBAL uint16x8x2_t vzipq_u16(uint16x8_t a, uint16x8_t b); // VZIP.16 q0,q0 +#define vzipq_u16 vzipq_s16 + +_NEON2SSE_GLOBAL uint32x4x2_t vzipq_u32(uint32x4_t a, uint32x4_t b); // VZIP.32 q0,q0 +#define vzipq_u32 vzipq_s32 + +_NEON2SSESTORAGE float32x4x2_t vzipq_f32(float32x4_t a, float32x4_t b); // VZIP.32 q0,q0 +_NEON2SSE_INLINE float32x4x2_t vzipq_f32(float32x4_t a, float32x4_t b) // VZIP.32 q0,q0 +{ + float32x4x2_t f32x4; + f32x4.val[0] = _mm_unpacklo_ps ( a, b); + f32x4.val[1] = _mm_unpackhi_ps ( a, b); + return f32x4; +} + +_NEON2SSE_GLOBAL poly8x16x2_t vzipq_p8(poly8x16_t a, poly8x16_t b); // VZIP.8 q0,q0 +#define vzipq_p8 vzipq_u8 + +_NEON2SSE_GLOBAL poly16x8x2_t vzipq_p16(poly16x8_t a, poly16x8_t b); // VZIP.16 q0,q0 +#define vzipq_p16 vzipq_u16 + +//*********************** De-Interleave elements ************************* +//************************************************************************* +//As the result of these functions first val contains (a0,a2,a4,....,b0,b2, b4,...) and the second val (a1,a3,a5,....b1,b3,b5...) +//no such functions in IA32 SIMD, shuffle is required +_NEON2SSESTORAGE int8x8x2_t vuzp_s8(int8x8_t a, int8x8_t b); // VUZP.8 d0,d0 +_NEON2SSE_INLINE int8x8x2_t vuzp_s8(int8x8_t a, int8x8_t b) // VUZP.8 d0,d0 +{ + int8x8x2_t val; + __m128i tmp, val0; + _NEON2SSE_ALIGN_16 static const int8_t maskdlv8[16] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11,15}; + tmp = _mm_unpacklo_epi8(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3,...,a7,b7 + val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)maskdlv8); //(a0, a2, a4, a6, b0, b2, b4, b6), (a1, a3, a5, a7, b1,b3, b5, b7) + vst1q_s8(val.val, val0); // _mm_shuffle_epi32(val.val[0], _SWAP_HI_LOW32); + return val; +} + +_NEON2SSESTORAGE int16x4x2_t vuzp_s16(int16x4_t a, int16x4_t b); // VUZP.16 d0,d0 +_NEON2SSE_INLINE int16x4x2_t vuzp_s16(int16x4_t a, int16x4_t b) // VUZP.16 d0,d0 +{ + int16x4x2_t val; + __m128i tmp, val0; + _NEON2SSE_ALIGN_16 static const int8_t maskdlv16[16] = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15}; + tmp = _mm_unpacklo_epi16(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3 + val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)maskdlv16); //a0,a2, b0, b2, a1,a3, b1,b3 + vst1q_s16(val.val, val0); // _mm_shuffle_epi32(val.val[0], _SWAP_HI_LOW32); + return val; +} + +_NEON2SSESTORAGE int32x2x2_t vuzp_s32(int32x2_t a, int32x2_t b); // VUZP.32 d0,d0 +_NEON2SSE_INLINE int32x2x2_t vuzp_s32(int32x2_t a, int32x2_t b) // VUZP.32 d0,d0 +{ + int32x2x2_t val; + __m128i val0; + val0 = _mm_unpacklo_epi32(_pM128i(a), _pM128i(b)); //a0,b0, a1,b1 + vst1q_s32(val.val, val0); // _mm_shuffle_epi32(val.val[0], _SWAP_HI_LOW32); + return val; +} + +_NEON2SSE_GLOBAL uint8x8x2_t vuzp_u8(uint8x8_t a, uint8x8_t b); // VUZP.8 d0,d0 +#define vuzp_u8 vuzp_s8 + +_NEON2SSE_GLOBAL uint16x4x2_t vuzp_u16(uint16x4_t a, uint16x4_t b); // VUZP.16 d0,d0 +#define vuzp_u16 vuzp_s16 + +_NEON2SSE_GLOBAL uint32x2x2_t vuzp_u32(uint32x2_t a, uint32x2_t b); // VUZP.32 d0,d0 +#define vuzp_u32 vuzp_s32 + +_NEON2SSE_GLOBAL float32x2x2_t vuzp_f32(float32x2_t a, float32x2_t b); // VUZP.32 d0,d0 +#define vuzp_f32 vzip_f32 + +_NEON2SSE_GLOBAL poly8x8x2_t vuzp_p8(poly8x8_t a, poly8x8_t b); // VUZP.8 d0,d0 +#define vuzp_p8 vuzp_u8 + +_NEON2SSE_GLOBAL poly16x4x2_t vuzp_p16(poly16x4_t a, poly16x4_t b); // VUZP.16 d0,d0 +#define vuzp_p16 vuzp_u16 + +_NEON2SSESTORAGE int8x16x2_t vuzpq_s8(int8x16_t a, int8x16_t b); // VUZP.8 q0,q0 +_NEON2SSE_INLINE int8x16x2_t vuzpq_s8(int8x16_t a, int8x16_t b) // VUZP.8 q0,q0 +{ + int8x16x2_t v8x16; + __m128i a_sh, b_sh; + a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask8_16_even_odd); //a0, a2, a4, a6, a8, a10, a12, a14, a1, a3, a5, a7, a9, a11, a13, a15 + b_sh = _mm_shuffle_epi8 (b, *(__m128i*)mask8_16_even_odd); //b0, b2, b4, b6, b8, b10, b12, b14, b1, b3, b5, b7, b9, b11, b13, b15 + //we need unpack64 to combine lower (upper) 64 bits from a with lower (upper) 64 bits from b + v8x16.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); ///a0, a2, a4, a6, a8, a10, a12, a14, b0, b2, b4, b6, b8, b10, b12, b14, + v8x16.val[1] = _mm_unpackhi_epi64(a_sh, b_sh); //a1, a3, a5, a7, a9, a11, a13, a15, b1, b3, b5, b7, b9, b11, b13, b15 + return v8x16; +} + +_NEON2SSESTORAGE int16x8x2_t vuzpq_s16(int16x8_t a, int16x8_t b); // VUZP.16 q0,q0 +_NEON2SSE_INLINE int16x8x2_t vuzpq_s16(int16x8_t a, int16x8_t b) // VUZP.16 q0,q0 +{ + int16x8x2_t v16x8; + __m128i a_sh, b_sh; + a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask8_32_even_odd); //a0, a2, a4, a6, a1, a3, a5, a7 + b_sh = _mm_shuffle_epi8 (b, *(__m128i*)mask8_32_even_odd); //b0, b2, b4, b6, b1, b3, b5, b7 + v16x8.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); //a0, a2, a4, a6, b0, b2, b4, b6 + v16x8.val[1] = _mm_unpackhi_epi64(a_sh, b_sh); //a1, a3, a5, a7, b1, b3, b5, b7 + return v16x8; +} + +_NEON2SSESTORAGE int32x4x2_t vuzpq_s32(int32x4_t a, int32x4_t b); // VUZP.32 q0,q0 +_NEON2SSE_INLINE int32x4x2_t vuzpq_s32(int32x4_t a, int32x4_t b) // VUZP.32 q0,q0 +{ + //may be not optimal solution compared with serial + int32x4x2_t v32x4; + __m128i a_sh, b_sh; + a_sh = _mm_shuffle_epi32 (a, 216); //a0, a2, a1, a3 + b_sh = _mm_shuffle_epi32 (b, 216); //b0, b2, b1, b3 + + v32x4.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); //a0, a2, b0, b2 + v32x4.val[1] = _mm_unpackhi_epi64(a_sh, b_sh); //a1, a3, b1, b3 + return v32x4; +} + +_NEON2SSE_GLOBAL uint8x16x2_t vuzpq_u8(uint8x16_t a, uint8x16_t b); // VUZP.8 q0,q0 +#define vuzpq_u8 vuzpq_s8 + +_NEON2SSE_GLOBAL uint16x8x2_t vuzpq_u16(uint16x8_t a, uint16x8_t b); // VUZP.16 q0,q0 +#define vuzpq_u16 vuzpq_s16 + +_NEON2SSE_GLOBAL uint32x4x2_t vuzpq_u32(uint32x4_t a, uint32x4_t b); // VUZP.32 q0,q0 +#define vuzpq_u32 vuzpq_s32 + +_NEON2SSESTORAGE float32x4x2_t vuzpq_f32(float32x4_t a, float32x4_t b); // VUZP.32 q0,q0 +_NEON2SSE_INLINE float32x4x2_t vuzpq_f32(float32x4_t a, float32x4_t b) // VUZP.32 q0,q0 +{ + float32x4x2_t v32x4; + v32x4.val[0] = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2,0, 2, 0)); //a0, a2, b0, b2 , need to check endianness however + v32x4.val[1] = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3,1, 3, 1)); //a1, a3, b1, b3, need to check endianness however + return v32x4; +} + +_NEON2SSE_GLOBAL poly8x16x2_t vuzpq_p8(poly8x16_t a, poly8x16_t b); // VUZP.8 q0,q0 +#define vuzpq_p8 vuzpq_u8 + +_NEON2SSE_GLOBAL poly16x8x2_t vuzpq_p16(poly16x8_t a, poly16x8_t b); // VUZP.16 q0,q0 +#define vuzpq_p16 vuzpq_u16 + +//############################################################################################## +//*********************** Reinterpret cast intrinsics.****************************************** +//############################################################################################## +// Not a part of official NEON instruction set but available in gcc compiler ********************* +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_u32 (uint32x2_t t); +#define vreinterpret_p8_u32 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_u16 (uint16x4_t t); +#define vreinterpret_p8_u16 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_u8 (uint8x8_t t); +#define vreinterpret_p8_u8 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_s32 (int32x2_t t); +#define vreinterpret_p8_s32 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_s16 (int16x4_t t); +#define vreinterpret_p8_s16 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_s8 (int8x8_t t); +#define vreinterpret_p8_s8 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_u64 (uint64x1_t t); +#define vreinterpret_p8_u64 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_s64 (int64x1_t t); +#define vreinterpret_p8_s64 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_f32 (float32x2_t t); +#define vreinterpret_p8_f32 + +_NEON2SSE_GLOBAL poly8x8_t vreinterpret_p8_p16 (poly16x4_t t); +#define vreinterpret_p8_p16 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_u32 (uint32x4_t t); +#define vreinterpretq_p8_u32 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_u16 (uint16x8_t t); +#define vreinterpretq_p8_u16 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_u8 (uint8x16_t t); +#define vreinterpretq_p8_u8 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_s32 (int32x4_t t); +#define vreinterpretq_p8_s32 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_s16 (int16x8_t t); +#define vreinterpretq_p8_s16 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_s8 (int8x16_t t); +#define vreinterpretq_p8_s8 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_u64 (uint64x2_t t); +#define vreinterpretq_p8_u64 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_s64 (int64x2_t t); +#define vreinterpretq_p8_s64 + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_f32 (float32x4_t t); +#define vreinterpretq_p8_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL poly8x16_t vreinterpretq_p8_p16 (poly16x8_t t); +#define vreinterpretq_p8_p16 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_u32 (uint32x2_t t); +#define vreinterpret_p16_u32 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_u16 (uint16x4_t t); +#define vreinterpret_p16_u16 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_u8 (uint8x8_t t); +#define vreinterpret_p16_u8 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_s32 (int32x2_t t); +#define vreinterpret_p16_s32 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_s16 (int16x4_t t); +#define vreinterpret_p16_s16 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_s8 (int8x8_t t); +#define vreinterpret_p16_s8 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_u64 (uint64x1_t t); +#define vreinterpret_p16_u64 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_s64 (int64x1_t t); +#define vreinterpret_p16_s64 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_f32 (float32x2_t t); +#define vreinterpret_p16_f32 + +_NEON2SSE_GLOBAL poly16x4_t vreinterpret_p16_p8 (poly8x8_t t); +#define vreinterpret_p16_p8 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_u32 (uint32x4_t t); +#define vreinterpretq_p16_u32 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_u16 (uint16x8_t t); +#define vreinterpretq_p16_u16 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_s32 (int32x4_t t); +#define vreinterpretq_p16_s32 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_s16 (int16x8_t t); +#define vreinterpretq_p16_s16 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_s8 (int8x16_t t); +#define vreinterpretq_p16_s8 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_u64 (uint64x2_t t); +#define vreinterpretq_p16_u64 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_s64 (int64x2_t t); +#define vreinterpretq_p16_s64 + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_f32 (float32x4_t t); +#define vreinterpretq_p16_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL poly16x8_t vreinterpretq_p16_p8 (poly8x16_t t); +#define vreinterpretq_p16_p8 vreinterpretq_s16_p8 + +//**** Integer to float ****** +_NEON2SSESTORAGE float32x2_t vreinterpret_f32_u32 (uint32x2_t t); +_NEON2SSE_INLINE float32x2_t vreinterpret_f32_u32 (uint32x2_t t) +{ + return (*(__m64_128*)&(t)); +} + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_u16 (uint16x4_t t); +#define vreinterpret_f32_u16 vreinterpret_f32_u32 + + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_u8 (uint8x8_t t); +#define vreinterpret_f32_u8 vreinterpret_f32_u32 + + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_s32 (int32x2_t t); +#define vreinterpret_f32_s32 vreinterpret_f32_u32 + + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_s16 (int16x4_t t); +#define vreinterpret_f32_s16 vreinterpret_f32_u32 + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_s8 (int8x8_t t); +#define vreinterpret_f32_s8 vreinterpret_f32_u32 + + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_u64(uint64x1_t t); +#define vreinterpret_f32_u64 vreinterpret_f32_u32 + + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_s64 (int64x1_t t); +#define vreinterpret_f32_s64 vreinterpret_f32_u32 + + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_p16 (poly16x4_t t); +#define vreinterpret_f32_p16 vreinterpret_f32_u32 + +_NEON2SSE_GLOBAL float32x2_t vreinterpret_f32_p8 (poly8x8_t t); +#define vreinterpret_f32_p8 vreinterpret_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_u32 (uint32x4_t t); +#define vreinterpretq_f32_u32(t) _M128(t) + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_u16 (uint16x8_t t); +#define vreinterpretq_f32_u16 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_u8 (uint8x16_t t); +#define vreinterpretq_f32_u8 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_s32 (int32x4_t t); +#define vreinterpretq_f32_s32 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_s16 (int16x8_t t); +#define vreinterpretq_f32_s16 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_s8 (int8x16_t t); +#define vreinterpretq_f32_s8 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_u64 (uint64x2_t t); +#define vreinterpretq_f32_u64 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_s64 (int64x2_t t); +#define vreinterpretq_f32_s64 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_p16 (poly16x8_t t); +#define vreinterpretq_f32_p16 vreinterpretq_f32_u32 + +_NEON2SSE_GLOBAL float32x4_t vreinterpretq_f32_p8 (poly8x16_t t); +#define vreinterpretq_f32_p8 vreinterpretq_f32_u32 + +//*** Integer type conversions ****************** +//no conversion necessary for the following functions because it is same data type +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_u32 (uint32x2_t t); +#define vreinterpret_s64_u32 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_u16 (uint16x4_t t); +#define vreinterpret_s64_u16 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_u8 (uint8x8_t t); +#define vreinterpret_s64_u8 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_s32 (int32x2_t t); +#define vreinterpret_s64_s32 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_s16 (int16x4_t t); +#define vreinterpret_s64_s16 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_s8 (int8x8_t t); +#define vreinterpret_s64_s8 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_u64 (uint64x1_t t); +#define vreinterpret_s64_u64 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_f32 (float32x2_t t); +#define vreinterpret_s64_f32 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_p16 (poly16x4_t t); +#define vreinterpret_s64_p16 + +_NEON2SSE_GLOBAL int64x1_t vreinterpret_s64_p8 (poly8x8_t t); +#define vreinterpret_s64_p8 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_u32 (uint32x4_t t); +#define vreinterpretq_s64_u32 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_s16 (uint16x8_t t); +#define vreinterpretq_s64_s16 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_u8 (uint8x16_t t); +#define vreinterpretq_s64_u8 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_s32 (int32x4_t t); +#define vreinterpretq_s64_s32 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_u16 (int16x8_t t); +#define vreinterpretq_s64_u16 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_s8 (int8x16_t t); +#define vreinterpretq_s64_s8 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_u64 (uint64x2_t t); +#define vreinterpretq_s64_u64 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_f32 (float32x4_t t); +#define vreinterpretq_s64_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_p16 (poly16x8_t t); +#define vreinterpretq_s64_p16 + +_NEON2SSE_GLOBAL int64x2_t vreinterpretq_s64_p8 (poly8x16_t t); +#define vreinterpretq_s64_p8 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_u32 (uint32x2_t t); +#define vreinterpret_u64_u32 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_u16 (uint16x4_t t); +#define vreinterpret_u64_u16 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_u8 (uint8x8_t t); +#define vreinterpret_u64_u8 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_s32 (int32x2_t t); +#define vreinterpret_u64_s32 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_s16 (int16x4_t t); +#define vreinterpret_u64_s16 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_s8 (int8x8_t t); +#define vreinterpret_u64_s8 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_s64 (int64x1_t t); +#define vreinterpret_u64_s64 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_f32 (float32x2_t t); +#define vreinterpret_u64_f32 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_p16 (poly16x4_t t); +#define vreinterpret_u64_p16 + +_NEON2SSE_GLOBAL uint64x1_t vreinterpret_u64_p8 (poly8x8_t t); +#define vreinterpret_u64_p8 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_u32 (uint32x4_t t); +#define vreinterpretq_u64_u32 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_u16 (uint16x8_t t); +#define vreinterpretq_u64_u16 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_u8 (uint8x16_t t); +#define vreinterpretq_u64_u8 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_s32 (int32x4_t t); +#define vreinterpretq_u64_s32 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_s16 (int16x8_t t); +#define vreinterpretq_u64_s16 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_s8 (int8x16_t t); +#define vreinterpretq_u64_s8 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_s64 (int64x2_t t); +#define vreinterpretq_u64_s64 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_f32 (float32x4_t t); +#define vreinterpretq_u64_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_p16 (poly16x8_t t); +#define vreinterpretq_u64_p16 + +_NEON2SSE_GLOBAL uint64x2_t vreinterpretq_u64_p8 (poly8x16_t t); +#define vreinterpretq_u64_p8 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_u32 (uint32x2_t t); +#define vreinterpret_s8_u32 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_u16 (uint16x4_t t); +#define vreinterpret_s8_u16 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_u8 (uint8x8_t t); +#define vreinterpret_s8_u8 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_s32 (int32x2_t t); +#define vreinterpret_s8_s32 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_s16 (int16x4_t t); +#define vreinterpret_s8_s16 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_u64 (uint64x1_t t); +#define vreinterpret_s8_u64 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_s64 (int64x1_t t); +#define vreinterpret_s8_s64 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_f32 (float32x2_t t); +#define vreinterpret_s8_f32 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_p16 (poly16x4_t t); +#define vreinterpret_s8_p16 + +_NEON2SSE_GLOBAL int8x8_t vreinterpret_s8_p8 (poly8x8_t t); +#define vreinterpret_s8_p8 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_u32 (uint32x4_t t); +#define vreinterpretq_s8_u32 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_u16 (uint16x8_t t); +#define vreinterpretq_s8_u16 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_u8 (uint8x16_t t); +#define vreinterpretq_s8_u8 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_s32 (int32x4_t t); +#define vreinterpretq_s8_s32 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_s16 (int16x8_t t); +#define vreinterpretq_s8_s16 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_u64 (uint64x2_t t); +#define vreinterpretq_s8_u64 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_s64 (int64x2_t t); +#define vreinterpretq_s8_s64 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_f32 (float32x4_t t); +#define vreinterpretq_s8_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_p16 (poly16x8_t t); +#define vreinterpretq_s8_p16 + +_NEON2SSE_GLOBAL int8x16_t vreinterpretq_s8_p8 (poly8x16_t t); +#define vreinterpretq_s8_p8 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_u32 (uint32x2_t t); +#define vreinterpret_s16_u32 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_u16 (uint16x4_t t); +#define vreinterpret_s16_u16 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_u8 (uint8x8_t t); +#define vreinterpret_s16_u8 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_s32 (int32x2_t t); +#define vreinterpret_s16_s32 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_s8 (int8x8_t t); +#define vreinterpret_s16_s8 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_u64 (uint64x1_t t); +#define vreinterpret_s16_u64 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_s64 (int64x1_t t); +#define vreinterpret_s16_s64 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_f32 (float32x2_t t); +#define vreinterpret_s16_f32 + + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_p16 (poly16x4_t t); +#define vreinterpret_s16_p16 + +_NEON2SSE_GLOBAL int16x4_t vreinterpret_s16_p8 (poly8x8_t t); +#define vreinterpret_s16_p8 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_u32 (uint32x4_t t); +#define vreinterpretq_s16_u32 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_u16 (uint16x8_t t); +#define vreinterpretq_s16_u16 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_u8 (uint8x16_t t); +#define vreinterpretq_s16_u8 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_s32 (int32x4_t t); +#define vreinterpretq_s16_s32 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_s8 (int8x16_t t); +#define vreinterpretq_s16_s8 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_u64 (uint64x2_t t); +#define vreinterpretq_s16_u64 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_s64 (int64x2_t t); +#define vreinterpretq_s16_s64 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_f32 (float32x4_t t); +#define vreinterpretq_s16_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_p16 (poly16x8_t t); +#define vreinterpretq_s16_p16 + +_NEON2SSE_GLOBAL int16x8_t vreinterpretq_s16_p8 (poly8x16_t t); +#define vreinterpretq_s16_p8 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_u32 (uint32x2_t t); +#define vreinterpret_s32_u32 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_u16 (uint16x4_t t); +#define vreinterpret_s32_u16 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_u8 (uint8x8_t t); +#define vreinterpret_s32_u8 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_s16 (int16x4_t t); +#define vreinterpret_s32_s16 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_s8 (int8x8_t t); +#define vreinterpret_s32_s8 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_u64 (uint64x1_t t); +#define vreinterpret_s32_u64 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_s64 (int64x1_t t); +#define vreinterpret_s32_s64 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_f32 (float32x2_t t); +#define vreinterpret_s32_f32 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_p16 (poly16x4_t t); +#define vreinterpret_s32_p16 + +_NEON2SSE_GLOBAL int32x2_t vreinterpret_s32_p8 (poly8x8_t t); +#define vreinterpret_s32_p8 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_u32 (uint32x4_t t); +#define vreinterpretq_s32_u32 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_u16 (uint16x8_t t); +#define vreinterpretq_s32_u16 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_u8 (uint8x16_t t); +#define vreinterpretq_s32_u8 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_s16 (int16x8_t t); +#define vreinterpretq_s32_s16 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_s8 (int8x16_t t); +#define vreinterpretq_s32_s8 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_u64 (uint64x2_t t); +#define vreinterpretq_s32_u64 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_s64 (int64x2_t t); +#define vreinterpretq_s32_s64 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_f32 (float32x4_t t); +#define vreinterpretq_s32_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_p16 (poly16x8_t t); +#define vreinterpretq_s32_p16 + +_NEON2SSE_GLOBAL int32x4_t vreinterpretq_s32_p8 (poly8x16_t t); +#define vreinterpretq_s32_p8 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_u32 (uint32x2_t t); +#define vreinterpret_u8_u32 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_u16 (uint16x4_t t); +#define vreinterpret_u8_u16 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_s32 (int32x2_t t); +#define vreinterpret_u8_s32 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_s16 (int16x4_t t); +#define vreinterpret_u8_s16 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_s8 (int8x8_t t); +#define vreinterpret_u8_s8 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_u64 (uint64x1_t t); +#define vreinterpret_u8_u64 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_s64 (int64x1_t t); +#define vreinterpret_u8_s64 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_f32 (float32x2_t t); +#define vreinterpret_u8_f32 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_p16 (poly16x4_t t); +#define vreinterpret_u8_p16 + +_NEON2SSE_GLOBAL uint8x8_t vreinterpret_u8_p8 (poly8x8_t t); +#define vreinterpret_u8_p8 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_u32 (uint32x4_t t); +#define vreinterpretq_u8_u32 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_u16 (uint16x8_t t); +#define vreinterpretq_u8_u16 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_s32 (int32x4_t t); +#define vreinterpretq_u8_s32 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_s16 (int16x8_t t); +#define vreinterpretq_u8_s16 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_s8 (int8x16_t t); +#define vreinterpretq_u8_s8 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_u64 (uint64x2_t t); +#define vreinterpretq_u8_u64 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_s64 (int64x2_t t); +#define vreinterpretq_u8_s64 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_f32 (float32x4_t t); +#define vreinterpretq_u8_f32(t) _M128i(t) + + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_p16 (poly16x8_t t); +#define vreinterpretq_u8_p16 + +_NEON2SSE_GLOBAL uint8x16_t vreinterpretq_u8_p8 (poly8x16_t t); +#define vreinterpretq_u8_p8 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_u32 (uint32x2_t t); +#define vreinterpret_u16_u32 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_u8 (uint8x8_t t); +#define vreinterpret_u16_u8 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_s32 (int32x2_t t); +#define vreinterpret_u16_s32 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_s16 (int16x4_t t); +#define vreinterpret_u16_s16 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_s8 (int8x8_t t); +#define vreinterpret_u16_s8 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_u64 (uint64x1_t t); +#define vreinterpret_u16_u64 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_s64 (int64x1_t t); +#define vreinterpret_u16_s64 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_f32 (float32x2_t t); +#define vreinterpret_u16_f32 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_p16 (poly16x4_t t); +#define vreinterpret_u16_p16 + +_NEON2SSE_GLOBAL uint16x4_t vreinterpret_u16_p8 (poly8x8_t t); +#define vreinterpret_u16_p8 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_u32 (uint32x4_t t); +#define vreinterpretq_u16_u32 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_u8 (uint8x16_t t); +#define vreinterpretq_u16_u8 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_s32 (int32x4_t t); +#define vreinterpretq_u16_s32 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_s16 (int16x8_t t); +#define vreinterpretq_u16_s16 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_s8 (int8x16_t t); +#define vreinterpretq_u16_s8 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_u64 (uint64x2_t t); +#define vreinterpretq_u16_u64 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_s64 (int64x2_t t); +#define vreinterpretq_u16_s64 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_f32 (float32x4_t t); +#define vreinterpretq_u16_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_p16 (poly16x8_t t); +#define vreinterpretq_u16_p16 + +_NEON2SSE_GLOBAL uint16x8_t vreinterpretq_u16_p8 (poly8x16_t t); +#define vreinterpretq_u16_p8 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_u16 (uint16x4_t t); +#define vreinterpret_u32_u16 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_u8 (uint8x8_t t); +#define vreinterpret_u32_u8 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_s32 (int32x2_t t); +#define vreinterpret_u32_s32 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_s16 (int16x4_t t); +#define vreinterpret_u32_s16 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_s8 (int8x8_t t); +#define vreinterpret_u32_s8 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_u64 (uint64x1_t t); +#define vreinterpret_u32_u64 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_s64 (int64x1_t t); +#define vreinterpret_u32_s64 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_f32 (float32x2_t t); +#define vreinterpret_u32_f32 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_p16 (poly16x4_t t); +#define vreinterpret_u32_p16 + +_NEON2SSE_GLOBAL uint32x2_t vreinterpret_u32_p8 (poly8x8_t t); +#define vreinterpret_u32_p8 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_u16 (uint16x8_t t); +#define vreinterpretq_u32_u16 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_u8 (uint8x16_t t); +#define vreinterpretq_u32_u8 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_s32 (int32x4_t t); +#define vreinterpretq_u32_s32 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_s16 (int16x8_t t); +#define vreinterpretq_u32_s16 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_s8 (int8x16_t t); +#define vreinterpretq_u32_s8 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_u64 (uint64x2_t t); +#define vreinterpretq_u32_u64 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_s64 (int64x2_t t); +#define vreinterpretq_u32_s64 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_f32 (float32x4_t t); +#define vreinterpretq_u32_f32(t) _M128i(t) + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_p16 (poly16x8_t t); +#define vreinterpretq_u32_p16 + +_NEON2SSE_GLOBAL uint32x4_t vreinterpretq_u32_p8 (poly8x16_t t); +#define vreinterpretq_u32_p8 + +//************* Round ****************** +_NEON2SSESTORAGE float32x4_t vrndnq_f32(float32x4_t a); +#ifdef USE_SSE4 +_NEON2SSE_INLINE float32x4_t vrndnq_f32(float32x4_t a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} +#else +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING( float32x4_t vrndnq_f32(float32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL) +{ + int i; + _NEON2SSE_ALIGN_16 float32_t res[4]; + _mm_store_ps(res, a); + for(i = 0; i<4; i++) { + res[i] = nearbyintf(res[i]); + } + return _mm_load_ps(res); +} +#endif + + +_NEON2SSESTORAGE float64x2_t vrndnq_f64(float64x2_t a); +#ifdef USE_SSE4 +_NEON2SSE_INLINE float64x2_t vrndnq_f64(float64x2_t a) +{ + return _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} +#else +_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(float64x2_t vrndnq_f64(float64x2_t a), _NEON2SSE_REASON_SLOW_SERIAL) +{ + _NEON2SSE_ALIGN_16 float64_t res[2]; + _mm_store_pd(res, a); + res[0] = nearbyint(res[0]); + res[1] = nearbyint(res[1]); + return _mm_load_pd(res); +} +#endif + + + +//************* Sqrt ****************** +_NEON2SSE_GLOBAL float32x4_t vsqrtq_f32(float32x4_t a); +#define vsqrtq_f32 _mm_sqrt_ps + +_NEON2SSE_GLOBAL float64x2_t vsqrtq_f64(float64x2_t a); +#define vsqrtq_f64 _mm_sqrt_pd + + +#endif /* NEON2SSE_H */ diff --git a/tordnscrypt/src/main/jni/invizible/session.c b/tordnscrypt/src/main/jni/invizible/session.c index 87a1bf8c6..5cbbcbc20 100644 --- a/tordnscrypt/src/main/jni/invizible/session.c +++ b/tordnscrypt/src/main/jni/invizible/session.c @@ -16,7 +16,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" @@ -250,14 +250,21 @@ void *handle_events(void *a) { check_icmp_socket(args, &ev[i]); else if (session->protocol == IPPROTO_UDP) { int count = 0; - while (count < UDP_YIELD && !args->ctx->stopping && - !(ev[i].events & EPOLLERR) && (ev[i].events & EPOLLIN) && - is_readable(session->socket)) { + do { count++; check_udp_socket(args, &ev[i]); - } - } else if (session->protocol == IPPROTO_TCP) - check_tcp_socket(args, &ev[i], epoll_fd); + } while (count < UDP_YIELD && !args->ctx->stopping && + !(ev[i].events & EPOLLERR) && (ev[i].events & EPOLLIN) && + is_readable(session->socket)); + } else if (session->protocol == IPPROTO_TCP) { + int count = 0; + do { + count++; + check_tcp_socket(args, &ev[i], epoll_fd); + } while (count < TCP_YIELD && !args->ctx->stopping && + !(ev[i].events & EPOLLERR) && (ev[i].events & EPOLLIN) && + is_readable(session->socket)); + } } if (error) diff --git a/tordnscrypt/src/main/jni/invizible/tcp.c b/tordnscrypt/src/main/jni/invizible/tcp.c index ce9818c7e..40a4ec8de 100644 --- a/tordnscrypt/src/main/jni/invizible/tcp.c +++ b/tordnscrypt/src/main/jni/invizible/tcp.c @@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" @@ -300,14 +300,32 @@ void check_tcp_socket(const struct arguments *args, // https://tools.ietf.org/html/rfc1929 // https://en.wikipedia.org/wiki/SOCKS#SOCKS5 + bool not_own_uid = s->tcp.uid != own_uid; + bool not_dns_request = ntohs(s->tcp.dest) != 53; + bool not_to_i2pd = !str_equal(I2PD_REDIRECT_ADDRESS, dest); + bool not_to_localhost = !(str_equal(LOOPBACK_ADDRESS, dest) || + str_equal(LOOPBACK_ADDRESS_IPv6, dest)); + bool redirect_to_tor = false; - if (*tor_socks5_addr && tor_socks5_port) { - redirect_to_tor = is_redirect_to_tor(args, s->tcp.uid, dest, s->tcp.dest); + if (not_own_uid && not_dns_request && not_to_i2pd && not_to_localhost + && *tor_socks5_addr && tor_socks5_port) { + redirect_to_tor = is_redirect_to_tor( + args, + s->tcp.uid, + dest, + ntohs(s->tcp.dest) + ); } bool redirect_to_proxy = false; - if (*proxy_socks5_addr && proxy_socks5_port) { - redirect_to_proxy = is_redirect_to_proxy(args, s->tcp.uid, dest, s->tcp.dest); + if (not_own_uid && not_dns_request && not_to_i2pd && not_to_localhost + && *proxy_socks5_addr && proxy_socks5_port) { + redirect_to_proxy = is_redirect_to_proxy( + args, + s->tcp.uid, + dest, + ntohs(s->tcp.dest) + ); } if (redirect_to_tor || redirect_to_proxy) { @@ -357,7 +375,9 @@ void check_tcp_socket(const struct arguments *args, } } else if (s->tcp.socks5 == SOCKS5_CONNECT && - bytes == 6 + (s->tcp.version == 4 ? 4 : 16) && + (bytes == 6 + (s->tcp.version == 4 ? 4 : 16) || + //for IPv4-mapped IPv6 address proxy + s->tcp.version == 6 && bytes == 6 + 4) && buffer[0] == 5) { if (buffer[1] == 0) { s->tcp.socks5 = SOCKS5_CONNECTED; @@ -409,10 +429,21 @@ void check_tcp_socket(const struct arguments *args, char socks5_username[127 + 1]; char socks5_password[127 + 1]; - bool redirect_to_tor = false; + bool not_own_uid = s->tcp.uid != own_uid; + bool not_dns_request = ntohs(s->tcp.dest) != 53; + bool not_to_i2pd = !str_equal(I2PD_REDIRECT_ADDRESS, dest); + bool not_to_localhost = !(str_equal(LOOPBACK_ADDRESS, dest) || + str_equal(LOOPBACK_ADDRESS_IPv6, dest)); - if (*tor_socks5_addr && tor_socks5_port) { - redirect_to_tor = is_redirect_to_tor(args, s->tcp.uid, dest, s->tcp.dest); + bool redirect_to_tor = false; + if (not_own_uid && not_dns_request && not_to_i2pd && not_to_localhost + && *tor_socks5_addr && tor_socks5_port) { + redirect_to_tor = is_redirect_to_tor( + args, + s->tcp.uid, + dest, + ntohs(s->tcp.dest) + ); } if (*proxy_socks5_addr && proxy_socks5_port && !redirect_to_tor) { @@ -1115,14 +1146,22 @@ int open_tcp_socket(const struct arguments *args, struct sockaddr_in6 addr6; if (redirect == NULL) { + bool not_own_uid = cur->uid != own_uid; + bool not_dns_request = ntohs(cur->dest) != 53; + bool not_to_i2pd = !str_equal(I2PD_REDIRECT_ADDRESS, dest); + bool not_to_localhost = !(str_equal(LOOPBACK_ADDRESS, dest) || + str_equal(LOOPBACK_ADDRESS_IPv6, dest)); + bool redirect_to_tor = false; - if (*tor_socks5_addr && tor_socks5_port) { - redirect_to_tor = is_redirect_to_tor(args, cur->uid, dest, cur->dest); + if (not_own_uid && not_dns_request && not_to_i2pd && not_to_localhost + && *tor_socks5_addr && tor_socks5_port) { + redirect_to_tor = is_redirect_to_tor(args, cur->uid, dest, ntohs(cur->dest)); } bool redirect_to_proxy = false; - if (*proxy_socks5_addr && proxy_socks5_port) { - redirect_to_proxy = is_redirect_to_proxy(args, cur->uid, dest, cur->dest); + if (not_own_uid && not_dns_request && not_to_i2pd && not_to_localhost + && *proxy_socks5_addr && proxy_socks5_port) { + redirect_to_proxy = is_redirect_to_proxy(args, cur->uid, dest, ntohs(cur->dest)); } if (redirect_to_tor) { @@ -1150,6 +1189,10 @@ int open_tcp_socket(const struct arguments *args, addr4.sin_family = AF_INET; inet_pton(AF_INET, proxy_socks5_addr, &addr4.sin_addr); addr4.sin_port = htons(proxy_socks5_port); + } else if (strcmp(proxy_socks5_addr, LOOPBACK_ADDRESS) == 0) { + addr6.sin6_family = AF_INET6; + inet_pton(AF_INET6, LOOPBACK_ADDRESS_MAPPED_IPv6, &addr6.sin6_addr); + addr6.sin6_port = htons(proxy_socks5_port); } else { addr6.sin6_family = AF_INET6; inet_pton(AF_INET6, proxy_socks5_addr, &addr6.sin6_addr); @@ -1189,7 +1232,7 @@ int open_tcp_socket(const struct arguments *args, ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6))); if (err < 0 && errno != EINPROGRESS) { - log_android(ANDROID_LOG_ERROR, "connect error %d: %s", errno, strerror(errno)); + log_android(ANDROID_LOG_ERROR, "TCP connect error %d: %s", errno, strerror(errno)); return -1; } diff --git a/tordnscrypt/src/main/jni/invizible/udp.c b/tordnscrypt/src/main/jni/invizible/udp.c index 0ab271b7b..351c85a75 100644 --- a/tordnscrypt/src/main/jni/invizible/udp.c +++ b/tordnscrypt/src/main/jni/invizible/udp.c @@ -18,13 +18,14 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ #include "invizible.h" extern int own_uid; extern int tor_dns_port; +extern bool bypass_lan; int get_udp_timeout(const struct udp_session *u, int sessions, int maxsessions) { int timeout = (ntohs(u->dest) == 53 ? UDP_TIMEOUT_53 : UDP_TIMEOUT_ANY); @@ -166,14 +167,6 @@ int has_udp_session(const struct arguments *args, const uint8_t *pkt, const uint if (ntohs(udphdr->dest) == 53) return !args->fwd53; - /*char dest[INET6_ADDRSTRLEN + 1]; - if (version == 4) { - inet_ntop(AF_INET, &ip4->daddr, dest, sizeof(dest)); - if (strcmp(dest, "10.191.0.1") == 0) { - return false; - } - }*/ - // Search session struct ng_session *cur = args->ctx->ng_session; while (cur != NULL && @@ -398,14 +391,26 @@ jboolean handle_udp(const struct arguments *args, //handle onion websites if (tor_dns_port > 0) { - char *suffix = strrchr(qname, '.'); - if (redirect != NULL && suffix != NULL && strcmp(suffix, ".onion") == 0) { + if (redirect != NULL && str_ends_with(qname, ".onion")) { redirect->rport = tor_dns_port; } } - //https://datatracker.ietf.org/doc/html/rfc7050 - if (redirect != NULL && strcmp(qname, "ipv4only.arpa") == 0) { - redirect = NULL; + + if (redirect != NULL) { + //https://datatracker.ietf.org/doc/html/rfc7050 + if (str_equal(qname, "ipv4only.arpa")) { + redirect = NULL; + } else if (bypass_lan) { + //https://datatracker.ietf.org/doc/html/rfc6762 + if (str_ends_with(qname, ".local") + || str_ends_with(qname, ".254.169.in-addr.arpa") + || str_ends_with(qname, ".8.e.f.ip6.arpa") + || str_ends_with(qname, ".9.e.f.ip6.arpa") + || str_ends_with(qname, ".a.e.f.ip6.arpa") + || str_ends_with(qname, ".b.e.f.ip6.arpa")) { + redirect = NULL; + } + } } } } @@ -520,6 +525,14 @@ int open_udp_socket(const struct arguments *args, } } + // Set non blocking + int flags = fcntl(sock, F_GETFL, 0); + if (flags < 0 || fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) { + log_android(ANDROID_LOG_ERROR, "fcntl socket O_NONBLOCK error %d: %s", + errno, strerror(errno)); + return -1; + } + return sock; } diff --git a/tordnscrypt/src/main/jni/invizible/util.c b/tordnscrypt/src/main/jni/invizible/util.c index 056152563..a9967dfa9 100644 --- a/tordnscrypt/src/main/jni/invizible/util.c +++ b/tordnscrypt/src/main/jni/invizible/util.c @@ -18,22 +18,81 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ +#if defined(__x86_64__) +//https://github.com/intel/ARM_NEON_2_x86_SSE +//https://github.com/android/ndk-samples/tree/main/hello-neon +#include "neon2sse.h" +#else #include +#endif #include "invizible.h" -#define CSUM_NEON_THRESHOLD 1 +#define CSUM_NEON_THRESHOLD 16 uint16_t calc_checksum(uint16_t start, const uint8_t *buffer, size_t length) { + uint32_t sum = start; + + // Process small inputs with scalar loop + if (length <= CSUM_NEON_THRESHOLD) { + const uint8_t *byte_buf = buffer; + while (length > 1) { + sum += *(uint16_t *) byte_buf; + byte_buf += 2; + length -= 2; + } + if (length > 0) { + sum += *byte_buf; + } + while (sum >> 16) { + sum = (sum & 0xFFFF) + (sum >> 16); + } + return (uint16_t) sum; + } + + // Align buffer to 16 bytes for NEON + const uint8_t *byte_buf = buffer; + while (((uintptr_t) byte_buf & 1) && length > 0) { + sum += *byte_buf++; + length--; + } + + // Process with NEON + const uint16_t *buf = (const uint16_t *) byte_buf; + size_t vec_len = length / 8; // Number of 128-bit chunks + size_t remainder = length % 8; + + uint32x4_t acc = vdupq_n_u32(0); // NEON accumulator + while (vec_len--) { + uint16x4_t data = vld1_u16(buf); // Load 4x16-bit values + acc = vaddw_u16(acc, data); // Accumulate into 32-bit vector + buf += 4; // Advance pointer + } + + // Reduce NEON accumulator into scalar sum + uint32_t temp[4]; + vst1q_u32(temp, acc); + sum += temp[0] + temp[1] + temp[2] + temp[3]; + + // Process remaining bytes + byte_buf = (const uint8_t *) buf; + while (remainder > 1) { + sum += *(uint16_t *) byte_buf; + byte_buf += 2; + remainder -= 2; + } + if (remainder > 0) { + sum += *byte_buf; // Add last byte if odd length + } - if (length > CSUM_NEON_THRESHOLD) { - log_android(ANDROID_LOG_DEBUG, "Checksum buffer length %d", length); - return do_csum_neon(start, buffer, length); + // Fold 32-bit sum into 16 bits + while (sum >> 16) { + sum = (sum & 0xFFFF) + (sum >> 16); } - return (uint16_t) do_csum_generic(start, buffer, length); + return (uint16_t) sum; } uint16_t do_csum_generic(uint16_t start, const uint8_t *buffer, size_t length) { @@ -296,4 +355,20 @@ long long get_ms() { return ts.tv_sec * 1000LL + ts.tv_nsec / 1e6; } -#pragma clang diagnostic pop \ No newline at end of file +int str_equal(const char *s, const char *f) { + if (!s || !f) + return 0; + size_t slen = strlen(s); + size_t flen = strlen(f); + return slen == flen && !memcmp(s, f, flen); +} + +int str_ends_with(const char *s, const char *suff) { + if (!s || !suff) + return 0; + size_t slen = strlen(s); + size_t sufflen = strlen(suff); + return slen >= sufflen && !memcmp(s + slen - sufflen, suff, sufflen); +} + +#pragma clang diagnostic pop diff --git a/tordnscrypt/src/main/res/drawable/ic_firewall_active_menu.xml b/tordnscrypt/src/main/res/drawable/ic_firewall_active_menu.xml index 581ef09b3..f999f5a0f 100644 --- a/tordnscrypt/src/main/res/drawable/ic_firewall_active_menu.xml +++ b/tordnscrypt/src/main/res/drawable/ic_firewall_active_menu.xml @@ -14,7 +14,7 @@ ~ You should have received a copy of the GNU General Public License ~ along with InviZible Pro. If not, see . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> diff --git a/tordnscrypt/src/main/res/drawable/ic_ttl.xml b/tordnscrypt/src/main/res/drawable/ic_ttl.xml index 539e0b22e..f2d86ed90 100644 --- a/tordnscrypt/src/main/res/drawable/ic_ttl.xml +++ b/tordnscrypt/src/main/res/drawable/ic_ttl.xml @@ -14,7 +14,7 @@ ~ You should have received a copy of the GNU General Public License ~ along with InviZible Pro. If not, see . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> + android:inputType="number" /> + android:inputType="textPersonName" + android:maxLength="127" /> + android:inputType="textPersonName" + android:maxLength="127" /> + + @@ -131,11 +142,25 @@ android:text="@string/proxy_save" android:textColor="@color/buttonTextColor" /> + + + android:layout_marginTop="4dp" + android:singleLine="false" + android:textAlignment="center" + tools:text="Success" /> diff --git a/tordnscrypt/src/main/res/layout/item_button.xml b/tordnscrypt/src/main/res/layout/item_button.xml index 3197c0b25..99c63337e 100644 --- a/tordnscrypt/src/main/res/layout/item_button.xml +++ b/tordnscrypt/src/main/res/layout/item_button.xml @@ -14,7 +14,7 @@ ~ You should have received a copy of the GNU General Public License ~ along with InviZible Pro. If not, see . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> + tools:text="12.09.2025" /> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> "Ausgehender HTTP Proxy" - "Ausgehender HTTP Proxy" + Adresse des ausgehenden HTTP Proxy "Ausgehender SOCKS Proxy" "Ausgehende SOCKS Proxy Adresse" "Ausgehender SOCKS Proxy Port" @@ -403,7 +403,7 @@ Lokale IP-Adresse Aktiviere Fix TTL und konfiguriere das LAN-Gerät, InviZible zu nutzen. Benutze den Standard-Gateway: %1$s, DNS Server: %2$s. Unterstützung für mehrere Nutzer - Unterstützung für Dual-Apps, MIUI, Island, Shelter und Apps mit Arbeitsprofilen + Unterstützung für Dual-Apps, MIUI, Island, Shelter und Apps mit Arbeitsprofilen. Im VPN Modus möglicherweise ineffektiv SOCKS-Ausgabe-Proxy Tor wird alle OR-Verbindungen durch den SOCKS5-Proxy leiten. "Laufende Dienste" @@ -548,4 +548,7 @@ Lokale Liste hinzufügen Regeln werden nach der in Stunden angegebenen Verzögerung aktualisiert. Remote Liste hinzufügen + Aktiviere durchgehend aktives VPN sowie die Blockierung aller Verbindungen ohne VPN, um den Internetzugriff bei geschlossenem InviZible Pro zu unterbinden + Ersetze die Remote-Liste + Ersetze die lokale Liste diff --git a/tordnscrypt/src/main/res/values-el/strings.xml b/tordnscrypt/src/main/res/values-el/strings.xml index f84604b1d..5b4b6e621 100644 --- a/tordnscrypt/src/main/res/values-el/strings.xml +++ b/tordnscrypt/src/main/res/values-el/strings.xml @@ -362,7 +362,7 @@ Should it be named instead "Βασικές Ρυθμίσεις" (originating from \n\n\tΤο InviZible Pro μπορεί να χρησιμοποιήσει δικαιώματα root, εάν η συσκευή σας έχει δικαιώματα root, ή χρησιμοποιεί ένα τοπικό VPN για να παρέχει κίνηση Διαδικτύου σε δίκτυα μέσω Tor, DNSCrypt και I2P. \n\n\tΠολιτική απορρήτου: \n\tΤο InviZible Pro δεν συλλέγει ούτε κοινοποιεί προσωπικά ή ευαίσθητα δεδομένα χρήστη. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy diff --git a/tordnscrypt/src/main/res/values-es/strings.xml b/tordnscrypt/src/main/res/values-es/strings.xml index 1be6d8179..37ebfaff8 100644 --- a/tordnscrypt/src/main/res/values-es/strings.xml +++ b/tordnscrypt/src/main/res/values-es/strings.xml @@ -348,7 +348,7 @@ \n\n\tInviZible Pro puede usar root si tu dispositivo está rooteado, o usa una VPN local para entregar el tráfico de internet directamente a las redes Tor, DNSCrypt e I2P. \n\n\tPolítica de privacidad: \n\tInviZible Pro no recopila ni comparte ningún dato personal o sensible del usuario. - \n\n\tDerechos de autor © 2019-2024 + \n\n\tDerechos de autor © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy @@ -554,4 +554,6 @@ Reemplazar lista remota Reemplazar lista local Habilite VPN siempre activa y bloquee conexiones sin VPN para que Invisible Pro bloquee Internet cuando la aplicación no se esté ejecutando + Ha configurado para utilizar el proxy Socks5, pero el proxy no parece estar funcionando. ¡Por favor, compruebe su configuración! + Utilice un proxy para aplicaciones que evitan Tor diff --git a/tordnscrypt/src/main/res/values-fa/strings.xml b/tordnscrypt/src/main/res/values-fa/strings.xml index c260f8bce..2d41e5169 100644 --- a/tordnscrypt/src/main/res/values-fa/strings.xml +++ b/tordnscrypt/src/main/res/values-fa/strings.xml @@ -16,7 +16,7 @@ بررسی روت بودن دستگاه… لطفاً صبر کنید… - نشان نده + مجدداً نشان داده نشود "DNSCrypt متوقف شد" "DNSCrypt نصب شد" @@ -69,11 +69,11 @@ فعال‌سازی DNSCrypt هنگام راه‌اندازی - شروع تور هنگام راه‌اندازی مجدد + راه اندازی خودکار TOR شروع خودکار I2P "تأخیر" - شروع با تأخیر (چند ثانیه) تنها اگر شروع خودکار به درستی کار نکرد + تنها درصورتی که شروع خودکار به درستی کار نکرد، با تاخیر (ثانیه) شروع به کار کند "سرورهای DNSCrypt" "انتخاب سرورهای DNSCrypt" "تنظیمات TOR" @@ -83,15 +83,15 @@ انتخاب وب‌سایت‌ها این لیست وب سایتها با TOR باز می شوند. این ویژگی برای سایت‌های پشت CDN کار نمی‌کند - برنامه‌ها را انتخاب کنید + انتخاب برنامه‌ها "برنامه‌هایی که با TOR می‌توان استفاده کرد" "برنامه‌هایی که با InviZible می‌توان استفاده کرد" - محدودسازی سایت‌ها + محدودسازی سایت‌ ها لیست این وب سایت‌ها به طور مستقیم باز می‌شوند. این ویژگی برای سایت‌های پشت CDN کار نمی‌کند - محدود‌سازی برنامه‌ها + محدود‌سازی برنامه‌ ها "برنامه‌هایی که مستقیم اجرا شوند" بازه بازخوانی - تعداد دفعات بازخوانی IPهای وب‌سایت‌ها در یک ساعت (فقط اندورید ۵.۱ به بالا). برای توقف بازخوانی عدد صفر را وارد کنید. + تعداد دفعات بازخوانی آیپی های وب‌سایت‌ ها در یک ساعت (فقط اندروید ۵.۱ به بالا). برای توقف بازخوانی عدد صفر را وارد کنید. "پل‌ها" "از این مورد تنها اگر نمی‌توانید به TOR متصل شوید استفاده کنید" "عدم استفاده از پل" @@ -109,14 +109,14 @@ "ذخیره" "بستن" "نام HOST اشتباه است!" - انتخاب قالب - انتخاب زبان + انتخاب قالب ( تم ) + زبان را انتخاب کنید "مسدودسازی HTTP" "مسدودسازی ارتباط با HTTP Port 80" "بروزرسانی" - بروزرسانی خودکار - بروزرسانی‌های InviZible Pro و ماژول‌ها را روزانه بررسی کنید - بررسی بروزرسانی + به روزرسانی خودکار + به روزرسانی‌های InviZible Pro و ماژول‌ها را روزانه بررسی کنید + بررسی به روزرسانی "بررسی کنید نسخه‌های جدید اکنون موجود هستند یا خیر" "مؤکداً انجام بروزرسانی‌ها از طریق TOR" "فقط از طریق TOR و هنگام فعال بودن TOR، بروزرسانی‌ها را بررسی کنید" @@ -131,26 +131,24 @@ "بروزرسانی قوانین برای هر تغییر اتصال" "نمایش اعلان‌ها" "جلوگیری از به خواب رفتن دستگاه" - حفاظت بیش‌تر بدون حالت روت برای جلوگیری از توقف برنامه توسط اندروید. ممکن است باعث تخلیه زودتر باتری شود + حفاظت بیش‌تر بدون حالت روت برای جلوگیری از توقف برنامه توسط اندروید. ممکن است باعث تخلیه زودتر باتری شود! "راهنمایی‌ها" - همیشه پیغام‌های راهنما را نشان بده - لطفا InviZible Pro را در بهینه‌سازی باتری دستگاه قرار ندهید‌. در غیر این صورت، ممکن است اندروید DNSCrypt و TOR و یا I2P را در هر زمان خاموش کند. در بعضی سیستم عامل‌های استثناء همانند سیستم عامل گوشی‌های شیائومی باید چند گام اضافه. + همیشه پیغام‌ های راهنما را نشان بده + لطفاً InviZible Pro را از بهینه‌سازی باتری اندروید استثناء کنید تا سیستم در هر زمانی DNSCrypt، تور، یا I2P را خاتمه ندهد. برخی از سیستم های خاص، مانند MIUI، ممکن است به مراحل اضافی نیاز داشته باشند. "انتخاب BusyBox" "هات اسپات-آزمایشی" - شروع Tethering هنگام راه‌اندازی مجدد - تور مجاز به Tethering است + شروع اشتراک گذاری هنگام بوت + اجازه به اشتراک گذاری تور "تعیین مسیر تمام فعالیت‌ها از طریق TOR" "تعیین مسیر تمام ترافیک ورودی از طریق TOR" - انتخاب وب‌سایت‌ها - لیست وب‌سایت‌ها به‌منظور باز شدن همراه با TOR برای هات اسپات - محدود‌سازی وب‌سایت‌ها - لیست وب‌سایت‌ها به‌منظور باز شدن خودکار برای هات اسپات - اجازه دادن به Tethering I2p + انتخاب سایت‌ها + لیست سایت‌ها به‌منظور باز شدن همراه با تور برای هات اسپات + محدود‌سازی سایت‌ها + لیست سایت‌ها به‌منظور باز شدن خودکار برای هات اسپات + اجازه دادن به اشتراک‌گذاری اینترنت مخفی "رفع مشکل TTL" - TTL از طریق VPN محلی بر روی 64 ثابت خواهد شد. به پشتیبانی از Kernel نیاز نیست. -\nاین مورد تنها در \"حالت روت\" همراه با غیرفعال بودن گزینه \"اجزای ماژول‌ها همراه با روت\" می‌تواند استفاده شود. -\nبرای رفع مشکل TTL لطفاً DNSCrypt و یا TOR را راه‌اندازی نمایید. - باز کردن تنظیمات Tethering + TTL با استفاده از VPN محلی به 64 ثابت خواهد شد. پشتیبانی از کرنل مورد نیاز نیست. برای رفع TTL لطفا DNSCrypt یا/و Tor را راه اندازی کنید. + باز کردن تنظیمات اشتراک‌گذاری "لطفاً DNSCrypt را مجدداً راه‌اندازی کنید" "لطفا TOR را مجدداً راه‌اندازی نمایید" "لطفا l2P را مجدداً راه‌اندازی نمایید" @@ -160,60 +158,56 @@ درحال اجرای فرمانهای روت… لطفا صبر کنید… "تنظیمات ذخیره شد" - "تنظیمات جامع" + تنظیمات کلی "فقط برای کاربران حرفه‌ای!" "حداقل یک سرور انتخاب کنید!" "پورت محلی به منظور شنود." - نیازمند به سرورها (از منابع استاتیک + ریموت) به منظور بر آورده‌سازی ویژگی‌های خاص. + نیازمند به سرورها (از منابع استاتیک + ریموت) به منظور بر آورده‌سازی ویژگی‌های خاص "استفاده از سرورهایی که پروتکل DNSCrypt را پیاده‌سازی کرده‌اند." "استفاده از سرورهایی که پروتکل DNS-over-HTTPS را پیاده‌سازی کرده‌اند." - نیازمند به سرورهایی که از طریق منابع ریموت به‌منظور بر آورده‌سازی ویژگی‌های خاص تعریف‌شده‌اند. + نیازمند به سرورهایی که از طریق منابع ریموت به‌منظور بر آورده‌سازی ویژگی‌های خاص تعریف‌شده‌اند "سرور می‌بایست از پسوندهای محافظتی DNS به نام DNSSEC پشتیبانی نماید." "سرور نمی‌بایست Queryهای کاربر را در وقایع‌نگار ثبت نماید (جهت اطلاع)." "سرور نمی‌بایست بلک ‌لیست خود را برای مواردی همچون کنترل خانوادگی، بلوکه کردن تبلیغات و غیره اجرا نماید." - "استفاده همیشه از TCP برای اتصال به سرورهای بالادستی." - اگر نیازمند به تعیین مسیر تمام فعالیت‌ها از طریق TOR هستید، این مورد می‌توانند مفید واقع شود. -\nدر غیر این صورت، آن را در حالت False قرار دهید، چرا که حفاظت را بهبود نمی‌بخشد (پروکسی Dnscrypt همیشه تمام موارد را رمزگذاری می‌نماید حتی در زمان استفاده از UDP)، و تنها می‌تواند تأخیر را افزایش دهد. - پروکسی ساکس. - "فعال کردن پروکسی" - "تعیین مسیر تمام اتصالات TCP به یک گره TOR محلی. -بدانید که TOR از UDP پشتیبانی نمی‌کند، لذا force-tcp را همچنین در حالت True قرار دهید." - "پورت پروکسی" - تنظیمات دیگر. - رفع اشکال کنندۀ مجدد پشتیبان. این یک رفع اشکال کنندۀ مجدد عادی و DNS-رمزنگاری نشده است، که تنها برای Queryهای تک‌شات در زمان بازیابی لیست ابتدایی رفع اشکال‌کنندۀ مجدد، و تنها اگر تنظیمات DNS سیستم کار نکند استفاده خواهد شد. این مورد هرگز زمانی که لیست‌ها تماماً ذخیره شده‌ باشند استفاده نخواهد شد. - "هرگز به پروکسی Dnscrypt اجازه ندهید تا از تنظیمات DNS سیستم استفاده کند. بی قید و شرط، از رفع اشکال‌کنندۀ مجدد پشتیبان استفاده کنید." - "فیلترها" + استفاده همیشه از TCP برای اتصال به سرورهای بالادستی + برای اتصال به سرورهای DNSCrypt به جای UDP از TCP استفاده کنید. اگر از DNSCrypt روی تور استفاده می کنید، این گزینه باید فعال شود. + پروکسی SOCKS + پراکسی خروجی + همه اتصالات TCP را به یک پراکسی ساکس 5 محلی داخل تور مسیریابی کنید. تور از UDP پشتیبانی نمی کند، بنابراین TCP اجباری را نیز روی فعال تنظیم کنید. + پورت پراکسی + تنظیمات دیگر + این یک حل‌کننده DNS معمولی و غیررمزگذاری‌شده است، که تنها برای پرس‌و‌جوهای تکی هنگام بازیابی فهرست حل‌کننده‌های اولیه، و تنها در صورتی که پیکربندی DNS سیستم کار نمی‌کند، استفاده می‌شود. اگر لیست‌ها قبلاً ذخیره شده باشند، هرگز استفاده نمی‌شود. + اجازه ندهید DNSCrypt سعی کند از تنظیمات DNS سیستم استفاده کند. بی قید و شرط از حل کننده های بوت‌استرپ استفاده کنید. + فیلتر ها قوانین هدایت "جست‌و‌جوهای مربوط به دامنه های خاص را به یک مجموعه اختصاصی از سرورها هدایت کنید." قوانین پنهان‌کاری - Cloaking یک آدرس از پیش تعریف شده برای یک نام خاص را برمی‌گرداند. -\n علاوه بر اینکه به عنوان یک فایل HOSTS عمل می‌کند، می‌تواند آدرس IP یک نام متفاوت را نیز برگرداند. همچنین مسطح‌سازی CNAME را نیز انجام خواهد داد. - "واقعه‌نگاری Query." + پنهان کردن یک آدرس از پیش تعریف شده برای یک نام خاص برمی گرداند. علاوه بر اینکه به عنوان یک فایل هاست عمل می کند، می تواند آدرس IP با نام دیگری را نیز برگرداند. همچنین رکورد نام متعارف را انجام می دهد. + واقعه‌نگاری درخواست "ثبت شدن Queryهای کلاینت در یک فایل." - "فعال کردن واقعه‌نگاری Query" + فعال کردن واقعه‌نگاری درخواست "به منظور کاهش حشو، این نوع Queryها را ثبت ننمایید. خالی نگه دارید تا تمام موارد را ثبت کنید." "باز کردن وقایع Query" - "وقایع مشکوک Query." - وقایع Query برای مناطقی که وجود ندارد. -\n این Queryها می‌توانند بیانگر وجود بدافزارها، برنامه‌های خراب/منسوخ، و دستگاه‌های نشان‌دهندۀ حضور شخص ثالث خود باشد.. - "واقعه‌نگاری مشکوک را فعال نمایید" + وقایع مشکوک درخواست + جستارهای گزارش برای مناطق وجود ندارد. این پرس‌وجوها می‌توانند وجود بدافزار، برنامه‌های خراب/منسوخ، و دستگاه‌هایی را که حضور آنها را به اشخاص ثالث نشان می‌دهند، آشکار کنند. + واقعه‌نگاری مشکوک "باز کردن وقایع مشکوک" - "مسدودسازی بر اساس الگو (بلک ‌لیست)." + مسدودسازی بر اساس الگو (لیست سیاه) "بلک‌ لیست" "بلک لیست از یک الگوی در هر خط ساخته شده است." - "مسدودسازی IP بر اساس الگو ( بلک‌لیست IP)." + مسدودسازی IP بر اساس الگو (لیست سیاه IP) "ip بلک لیست" "بلک لیست IP از یک الگو در هر خط ساخته شده است." - "لیست مجاز مبتنی بر الگو (دور زدن لیست‌های سیاه)." + لیست مجاز مبتنی بر الگو (دور زدن لیست‌های سیاه) "لیست مجاز" "لیست مجاز از الگوهای مشابه لیست‌های سیاه پشتیبانی می‌کند. اگر یک نام با لیست مجاز مطابقت داشته باشد، جلسه مربوطه اقدام به دور زدن نام‌ها و فیلترهای IP می‌کند." "سرورها" "منابع" "لیست‌های ریموت از سرورهای موجود." - "لیست منابع بعد از ساعت‌های refresh_delay منقضی می‌شود." + لیست سرور پس از تأخیر مشخص شده در ساعت به روز خواهد شد. "رله‌ها" "منابع" - "لیست رله‌ها بعد از ساعت‌های refresh_delay منقضی می‌شود." + لیست رله پس از تأخیر مشخص شده بر حسب ساعت به روز می شود. "رله‌های DNSCrypt" "اولویت در dnscrypt-proxy.toml وجود ندارد!" "حذف واقعه" @@ -235,13 +229,13 @@ "هر NUM ثانیه در نظر بگیرید که آیا باید یک مدار جدید بسازید یا خیر." "در استفاده مجدد از یک مدار که برای اولین بار حداکثر NUM ثانیه قبل استفاده شد راحت باشید، اما هرگز یک جریان جدید را به مدار قدیمی که کهنه است وصل نکنید." "در صورت فعال بودن، TOR دو سروری که آدرس IP آن‌ها خیلی نزدیک است را در یک مدار مشابه قرار نخواهد داد. در حال حاضر، اگر در یک محدوده / 16 قرار داشته باشند، دو آدرس خیلی نزدیک هستند." - "فعال کردن پروکسی ساکس" + پراکسی SOCKS "برای شنود به اتصالات برنامه‌های SOCKS-SOC، این پورت را باز کنید." - "فعال کردن تونل HTTP" + تونل HTTP "این پورت را باز کنید تا به جای SOCKS، از پروتکل HTTP CONNECT برای شنود اتصالات پروکسی استفاده کنید." - "پروکسی ترانسپرنت را فعال کنید" + پروکسی شفاف "برای شنود اتصالات پروکسی ترانسپرنت، این پورت را باز کنید." - "فعال کردن DNS" + حل DNS "برای شنود درخواست‌های UDP DNS، این پورت را باز کنید و آن‌ها را به‌صورت ناشناس حل کنید." "در صورت غیرفعال کردن، TOR از اتصال به سرورهای دایرکتوری و گره‌های ورودی از طریق IPv4 جلوگیری می‌کند." "در صورت فعال بودن، TOR ممکن است به سرورهای دایرکتوری یا گره‌های ورودی از طریق IPv6 متصل شود." @@ -250,16 +244,16 @@ "Host یا IP برای میانبر" "ویرایش Host یا IP" "غیرفعال شده" - "پوشه ماژول را تمیز کنید" + حذف حافظه پنهان "برنامه‌هایی برای استفاده با InviZible" "برنامه‌هایی برای میانبر InviZible" "انتخاب همه" "پاک کردن موارد انتخاب شده" - تنظیمات عمومی - اجازه دادن به اتصالات ورودی + تنظیمات مشترک + اتصالات ورودی - پورت برای شنود اتصالات ورودی. - آی پی خارجی روتر برای اتصالات ورودی. + پورت برای گوش دادن به اتصالات ورودی (پیش‌فرض: خودکار (تصادفی)). + IP خارجی روتر برای اتصالات ورودی (پیش‌فرض: اگر SSU2 فعال باشد، خودکار). فعال کردن ارتباط از طریق IPv4. فعال کردن ارتباط از طریق IPv6. "روتر تونل‌های ترانزیت را نپذیرفته، و ترافیک ترانزیت را کاملاً غیرفعال کرده است." @@ -268,7 +262,7 @@ "حداکثر ٪ از پهنای باند برای انتقال. 0-100." "فعال کردن پروتکل انتقال SSU (استفاده از UDP)." "فعال کردن پروتکل NTCP2 (استفاده از TCP)." - فعال کردن ntcpproxy + پراکسی خروجی "سرور پروکسی را برای NTCP مشخص کنید. باید http://address:port یا socks://address:port باشد." "پروکسی HTTP" "پروکسی HTTP" @@ -287,11 +281,11 @@ "فعال‌کردن یا غیرفعال‌کردن UPnP." در حال تحقیق - امضای su3. را تأیید کنید. + امضای su3 را تأیید کنید. "محدودیت‌ها" "حداکثر تعداد تونل‌های ترانزیت را نادیده بگیرید. 2500 به‌صورت پیش‌فرض." "تعداد توصیف‌کننده‌های فایل باز (صفر - استفاده از محدودیت سیستم) محدود کنید." - حداکثر اندازه فایل اصلی در واحد Kb (صفر - استفاده از محدودیت سیستم). + حداکثر اندازه فایل اصلی در کیلوبایت (0 - استفاده از محدودیت سیستم). دفترچه آدرس "URL اشتراک کتابچه آدرس برای تنظیم اولیه." @@ -302,8 +296,8 @@ "به نظر می‌رسد DNSCrypt توسط سیستم اندرویدی اجباراً بسته شده است. اتصال اینترنتی شما بازیابی شد. تنظیمات دستگاه را بررسی کنید!" "به نظر می‌رسد که TOR توسط سیستم اندرویدی اجباراً بسته شده است. اتصال اینترنتی شما بازیابی شد. تنظیمات دستگاه را بررسی کنید!" "به نظر می‌رسد I2P توسط سیستم اندرویدی اجباراً بسته شده است. تنظیمات دستگاه را بررسی کنید!" - "به نظر می‌رسد DNSCrypt نمی‌تواند به اینترنت متصل شود. لطفاً قوانین فایروال را بررسی کنید. اتصال برای InviZible Pro و برنامه‌های روت شده را مجاز کنید." - به نظر می‌رسد TOR نمی‌تواند به اینترنت وصل شود. ممکن‌ است سرویس‌دهنده اینترنت اتصالات TOR را مسدود کرده است. شما می‌توانید از پل‌های TOR استفاده کنید. لطفا آن‌ها را در منو -> تنظیمات سریع -> پل‌ها پیدا کنید + به نظر می رسد DNSCrypt نمی تواند به اینترنت متصل شود. می توانید سعی کنید سرورهای DNSCrypt دیگری را انتخاب کنید. لطفاً آنها را در منو-> تنظیمات سریع-> انتخاب سرور‌های DNSCrypt پیدا کنید + به نظر می‌رسد TOR نمی‌تواند به اینترنت وصل شود. ممکن‌ است سرویس‌دهنده اینترنت اتصالات TOR را مسدود کرده باشد. شما می‌توانید از پل‌های TOR استفاده کنید. لطفا آن‌ها را در منو -> تنظیمات سریع -> پل‌ها پیدا کنید "قفل کودک" "شما می‌توانید کنترل این برنامه را قفل کنید. لطفاً رمز عبور را وارد نمایید، یا از رمز عبور قبلی استفاده نمایید." "لطفا رمز عبور را وارد نمایید." @@ -331,12 +325,12 @@ "آپدیت DNSCrypt در دسترس است. آیا می‌خواهید آن را دانلود و آپدیت نمایید؟ آپدیت در پس‌زمینه ادامه خواهد داشت." "آپدیت TOR در دسترس است. آیا می‌خواهید آن را دانلود و آپدیت نمایید؟ آپدیت در پس‌زمینه ادامه خواهد داشت." "آپدیت Purple I2P در دسترس است. آیا می‌خواهید آن را دانلود و آپدیت نمایید؟ آپدیت در پس‌زمینه ادامه خواهد داشت." - در حال چک کردن بروزرسانی - لطفاً در هنگام بررسی وجود بروزرسانی‌ها صبر نمایید. + در حال چک کردن به‌روزرسانی + لطفاً در هنگام بررسی وجود به‌روزرسانی‌ها صبر نمایید. "سرور بروزرسانی موقتاً در دسترس نمی‌باشد. لطفاً بعداً سعی نمایید." "سرور بروزرسانی در دسترس نمی‌باشد." "آپدیت با خطا مواجه شد." - بروزرسانی یافت نشد. + به‌روزرسانی یافت نشد. "آخرین بررسی:" "بروزرسانی یافت شد." "بروزرسانی نصب شد." @@ -346,7 +340,7 @@ "به نظر می‌رسد این نسخه غیررسمی از InviZible است. لطفاً مراقب آن باشید!" "فقط برای نسخه PRO" "کمک مالی" - "پروژه InviZible Pro به کمک شما احتیاج دارد. لطفاً از صفحه کمک مالی بازدید و یا کد ثبت‌نام دریافتی را وارد نمایید." + پروژه InviZible به دنبال کمک شماست. لطفاً از صفحه اهدا بازدید کنید یا کد پریمیوم را که قبلاً دریافت کرده اید وارد کنید. "بازدید" "کد را وارد کنید" "کد را وارد کنید" @@ -355,17 +349,15 @@ "حالت VPN فعال است" "حالت VPN خاموش است" "خطا در حالت VPN!" - لطفاً از سرور پروکسی در تنظیمات دستگاه‌ متصل استفاده نمایید. مقادیر پیش‌فرض: -\nآی‌پی:10.1.10.1 -\nپورت:8118 + لطفاً استفاده از سرور پروکسی را در تنظیمات دستگاه متصل پیکربندی کنید. مقادیر پیش فرض: آیپی:10.1.10.1 پورت:8118 پنهان‌سازی IP توسط تور - "حفاظت از DNS توسط DNSCrypt" - "دسترسی به سایت‌های I2P توسط Purple I2P" + حفاظت با DNSCrypt + دسترسی به شبکه I2P با I2P بنفش "توجه" "افزودن سرور سفارشی" "تنظیمات سرور سفارشی نامعتبر است. لطفاً قسمت SDNS را بررسی نمایید." "لطفاً توجه نمایید که شما یک خرید در حالت انتظار دارید:" - "پروژه InviZible Pro به کمک شما نیاز دارد. بر روی OK کلیک کرده تا نسخه پیشرفته را خریداری نمایید." + پروژه InviZible به دنبال کمک شماست. برای خرید ویژگی های ممتاز، OK را فشار دهید. "متأسفیم، اما خرید شما تأیید نشد. طی ۳ روز آینده وجه به حساب شما برمی‌گردد." "متأسفانه، این قابلیت فقط برای نسخه کامل برنامه قابل استفاده است." "پروکسی" @@ -410,21 +402,21 @@ "پل‌های پیش فرض جدید TOR موجود است. آیا می خواهید آنها را به روز کنید؟" "تشخیص حمله MITM" "تشخیص کلاهبرداری ARP" - کلاهبرداری ARP در مرکز و حملات ظالمانه DHCP در شبکه های Wi-Fi را شناسایی کنید. + تشخیص حملات مرد میانی و جعل ARP و حملات DHCP در شبکه های Wi-Fi. "با شناسایی حمله اینترنت را مسدود کنید" "اتصال اینترنت در هنگام حمله مسدود خواهد شد" "پروکسی" "از پروکسی socks5 استفاده کنید" "اپلیکیشن InviZible Pro تمام ارتباطات را از طریق پروکسی SOCKS5 برقرار می کند" "حالت سازگاری" - اگر روی گوشی موبایل خود کاستوم‌رام نصب کرده‌اید و با فشار دادن دکمه شروع، اتصال قطع میشود، آن را فعال کنید + اگر روی گوشی موبایل خود کاستوم‌ رام نصب کرده‌اید و با فشار دادن دکمه شروع، اتصال قطع میشود، آن را فعال کنید "کنترل اسکریپت را فعال کنید" - "برای مدیریت ماژول های برنامه از دستور زیر استفاده کنید: \"am broadcast -a pan.alexander.tordnscrypt.SHELL_SCRIPT_CONTROL --ei dnscrypt 1 --ei tor 1 --ei i2p 1 %s \". جایی که 1 - ثانیه" + از دستور زیر برای مدیریت ماژول های برنامه استفاده کنید: \"am broadcast -a pan.alexander.tordnscrypt.SHELL_SCRIPT_CONTROL --ei dnscrypt 1 --ei tor 1 --ei i2p 1 %s\". با 1 - شروع می شود، 0 - ماژول را متوقف می کند. "آدرس IP دستگاه LAN" "مشکل TTL را برطرف کرده و دستگاه LAN را برای اتصال به InviZible پیکربندی کنید. از درگاه پیش فرض استفاده کنید: %2$s، دی‌ان‌اس سرور: %1$s." "پشتیبانی از چند کاربر" - پشتیبانی از برنامه‌های دوتایی، MIUI، جزیره، پناهگاه و برنامه‌های نمایه‌کاری - "پراکسی خروجی SOCKS را فعال کنید" + پشتیبانی از اپلیکیشن های دوگانه، MIUI، جزیره، پناهگاه و پروفایل کار. ممکن است در حالت VPN بی اثر باشد + پراکسی خروجی SOCKS "تور تمام ارتباطات OR را از طریق پروکسی SOCKS 5 برقرار می کند." "خدمات در حال اجرا" "اعلان ها را به روز کنید" @@ -489,10 +481,10 @@ اعلان‌های کمکی حفاظت مجدد از DNS هنگام شناسایی حمله مجدد DNS، سایت را مسدود کنید - "DNS rebinding" + الحاق دوباره DNS حمله بالقوه DNS Rebinding شناسایی شد! سایت %s مسدود شده است. آیا اطمینان دارید؟ این اقدام قابل برگشت نیست! - ریست تنظیمات + بازنشانی تنظیمات نسخه پشتیبان ذخیره شد نسخه پشتیبان بازیابی شد پروژه InviZible و نویسنده آن از این کمک قدردانی می کنند! @@ -510,7 +502,7 @@ \nسیاست حفظ حریم‌خصوصی: \nاپ InviZible Pro هیچ گونه اطلاعات شخصی یا حساس کاربر را جمع آوری یا به اشتراک نمی گذارد. \n -\nحق چاپ © 2019-2024 +\nحق چاپ © 2019-2025 \nGarmatin Oleksandr \ninvizible.soft@gmail.com \ninvizible.net/en/privacy @@ -521,18 +513,18 @@ ذخیره دیتا شبکه غیرفعال شود؟ استفاده از دستگاه BusyBox استفاده از برنامه BusyBox - از BusyBox استفاده نکن. + از BusyBox استفاده نکنید پل IPv6 مسدود کردن دسترسی به اینترنت زمانی Tor, DNSCrypt و Purple I2P متوقف هستند گزارش لحظه‌ای - مستثنی کردن از Tor - مسیر به Tor - مستثنی کردن UDP از TOR + از Tor استفاده نشود + عبور ترافیک از Tor + UDP از Tor استفاده نکند مستثنی کردن به طور کامل - "لطفا اجازه فعال شدن background data usage و data usage را هنگامی که ذخیره دیتا روشن است بدهید.این مورد برای تجربه کاربری بهتر مورد نیاز است." + لطفاً استفاده از داده پس‌زمینه را مجاز کنید و وقتی «بهینه‌سازی داده» روشن است، اجازه استفاده از داده را بدهید. این برای یک تجربه آنلاین روان ضروری است. نمایش گزارش‌های ارتباط برنامه در تب DNS قطع اضطراری - فعال کردن پروتکل انتقال SSU2 (استفاده از UDP) + پروتکل انتقال SSU2 را فعال کنید (از UDP استفاده کنید). تنظیمات ایزوله فقط برنامه هایی که می توانند به اینترنت متصل شوند نشان داده می شوند. مدارها را با جریان های برنامه های مختلف به اشتراک نگذارید. @@ -572,4 +564,16 @@ برای ویرایش طولانی فشار دهید. گره ها InviZible Pro نمی تواند %1$s را شروع کند! لطفاً تنظیمات %2$s را بازنشانی کنید. اگر کمکی نکرد، لطفاً دستگاه خود را راه اندازی مجدد کنید. + شما برای استفاده از پراکسی Socks5 پیکربندی کرده اید، اما به نظر می رسد که پروکسی در حال اجرا نیست. لطفا تنظیمات خود را بررسی کنید! + از پروکسی برای برنامه‌هایی که تور را دور می‌زنند استفاده کنید + افزودن قانون + فعال کردن فیلترشکن همیشه روشن و مسدود کردن اتصالات بدون فیلترشکن برای InviZible Pro برای مسدود کردن اینترنت در زمانی که برنامه در حال اجرا نیست + قوانین پس از تاخیر مشخص شده در ساعت به روز می شوند. + جایگزینی لیست راه دور + افزودن لیست راه دور + افزودن لیست محلی + مجموع: %d قانون + افزودن نشانی اینترنتی + جایگزینی لیست محلی + قوانین diff --git a/tordnscrypt/src/main/res/values-fr/strings.xml b/tordnscrypt/src/main/res/values-fr/strings.xml index 0dfed5f31..214293da3 100644 --- a/tordnscrypt/src/main/res/values-fr/strings.xml +++ b/tordnscrypt/src/main/res/values-fr/strings.xml @@ -348,7 +348,7 @@ \n\n\tInviZible Pro peut utiliser root, si votre appareil possède des privilèges root, ou utiliser un RPV local pour acheminer le trafic Internet vers les réseaux Tor, DNSCrypt et I2P. \n\n\tPolitique de confidentialité: \n\tInviZible Pro ne collecte ni ne partage aucune donnée personnelle ou sensible des utilisateurs. - \n\tCopyright © 2019-2024 + \n\tCopyright © 2019-2025 \n\tOleksandr Garmatin \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy diff --git a/tordnscrypt/src/main/res/values-in/strings.xml b/tordnscrypt/src/main/res/values-in/strings.xml index 7d50ffc67..5b1ad6413 100644 --- a/tordnscrypt/src/main/res/values-in/strings.xml +++ b/tordnscrypt/src/main/res/values-in/strings.xml @@ -489,7 +489,7 @@ \n\n\tInviZible Pro bisa menggunakanroot, juka penrangkat mu memeliki izin root, atau menggunakan VPN lokal untuk Tor, DNSCrypt dan I2P. \n\n\tKebijakan privasi: \n\tInviZible Pro tidak mengumpulkan dan membagikan data sensitif pengguna. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy diff --git a/tordnscrypt/src/main/res/values-it/strings.xml b/tordnscrypt/src/main/res/values-it/strings.xml index 6dfe16a54..c4d8d2032 100644 --- a/tordnscrypt/src/main/res/values-it/strings.xml +++ b/tordnscrypt/src/main/res/values-it/strings.xml @@ -347,7 +347,7 @@ \n\n\tInviZible Pro può funzionare in modalità root, se il tuo dispositivo ha i privilegi di root, o utilizzare una VPN locale per reindirizzare il traffico Internet a Tor, DNSCrypt e alla rete I2P. \n\n\tPolitica della Privacy: \n\tInviZible Pro non raccoglie o condivide nessun dato utente personale o sensibile. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy diff --git a/tordnscrypt/src/main/res/values-ja/strings.xml b/tordnscrypt/src/main/res/values-ja/strings.xml index f6baab320..d24281a5b 100644 --- a/tordnscrypt/src/main/res/values-ja/strings.xml +++ b/tordnscrypt/src/main/res/values-ja/strings.xml @@ -371,7 +371,7 @@ "ドメインリストをインポートします。複数のファイルを選択できます。重複する項目は削除されます。" "ホワイトリストを消去" "Immediately respond to IPv6-related queries with an empty response. This makes things faster when there is no IPv6 connectivity, but can also cause reliability issues with some stub resolvers." - "Androidのネットワーク設定で「プライベートDNS」機能を無効にしてください。この設定はInvizibleと干渉します。" + Androidのネットワーク設定で「プライベートDNS」機能を無効にしてください。この設定はInvizibleと干渉します。 "ルールをインポート" "しばらくお待ちください... %d 個のルールをインポートしました。" "完了! %d 個のルールをインポートしました。" diff --git a/tordnscrypt/src/main/res/values-nl/strings.xml b/tordnscrypt/src/main/res/values-nl/strings.xml new file mode 100644 index 000000000..2f4c7d495 --- /dev/null +++ b/tordnscrypt/src/main/res/values-nl/strings.xml @@ -0,0 +1,558 @@ + + + Toestaan + Reservekopie hersteld + U kunt InviZible Pro gebruiken met lokale VPN-modus of toepassingen met eigen proxy of lokale VPN-functie in proxy-modus. + DNSCrypt automatisch opstarten + I2P geïnstalleerd + Backup opgeslagen + Controleren of Root beschikbaar is… + Even geduld… + Tor met DNSCrypt en I2P + Instellingen + Kies backupmap: + beste oplossing voor uw privacy + Instellingen + I2P is gestopt + I2P is actief + I2P starten + I2P stoppen + I2P installeren + I2P niet geïnstalleerd + Starten bij opstarten + Niet tonen + DNSCrypt is gestopt + DNSCrypt geïnstalleerd + DNSCrypt installeren + DNSCrypt niet geïnstalleerd + DNSCrypt starten + DNSCrypt is actief + DNSCrypt is gestopt + Tor is gestopt + Tor geïnstalleerd + Tor installeren + Tor Starten + Aansluiting + Tor Loopt + Annuleren + Fout! + Waarschuwing! + Installeren + Verlaat + Klaar + Er is iets misgegaan! + Wil je de installatie voltooien? + Premium versie + Ik ga akkoord + Ik ben het er niet mee eens + Wijzigingen bewaren + Wijzigingen verwijderen + Weigeren + Weet je het zeker? Deze actie kan niet ongedaan worden gemaakt! + Vraag later + U kunt InviZible Pro gebruiken met toepassingen met een eigen proxy of lokale VPN-functie in proxymodus. + Fout bij opslaan bestand! + Algemene instellingen + DNSCrypt Instellingen + Tor Instellingen + Back-up maken en herstellen + Over + Logboeken + Doneren + Instellingen Opnieuw Instellen + Map Pad: + Tor automatisch opstarten + I2P automatisch opstarten + Vertraging + Gebruik alleen een vertraging (sec) als Autostart niet goed werkt + DNSCrypt servers + Selecteer DNSCrypt-servers + Tor Instellingen + Tor is niet geïnstalleerd + Tor Stoppen + I2P Instellingen + Snelle instellingen + Instellingen Herstellen + Instellingen Bewaren + Automatisch starten + Routing-instellingen + Routeer al het verkeer via Tor + Routeer al het verkeer via InviZible + Applicaties uitsluiten + Sites uitsluiten + Applicaties voor gebruik met InviZible + Applicaties voor gebruik met Tor + Applicaties selecteren + Sites lijst om te openen met Tor. Deze functie werkt niet voor sites achter CDN + Selecteer sites + Sites lijst om direct te openen. Deze functie werkt niet voor sites achter CDN + Lijst met applicaties direct openen + Uitsluiten van Tor + Route naar Tor + UDP uitsluiten van Tor + Volledig uitsluiten + Vernieuwingsinterval + Periode in uren om het IP van sites te vernieuwen. Voor Android 5.1 en hoger. Zet 0 om het verversen te stoppen. + Bruggen + Gebruik het als je geen verbinding kunt maken met het Tor-netwerk + Gebruik geen bruggen + Voer de tekens van de afbeelding in + Nieuwe bruggen aanvragen + Je nieuwe bruggen van bridges.torproject.org: + Opslaan + Sluiten + Nep SNI + Verkeerde hostnaam! + Thema selecteren + Dagmodus + Nachtmodus + LAN-adressen voor te omzeilen + Taal selecteren + Real-time logboeken + Logboeken van applicatieverbindingen weergeven op het tabblad DNS + Blokkeer http + Verbinding met http poort 80 weigeren + Bijwerken + Controleer dagelijks de updates van InviZible Pro en modules + Update controleren + Update uitsluitend via Tor + Controleer updates alleen met Tor en als Tor actief is + Rootrechten gebruiken + Modules uitvoeren met root + Overige + HOTSPOT http blokkeren + Verbinding weigeren met http poort 80 voor HOTSPOT + Detectie van MITM-aanvallen + ARP-spoofingdetectie + Man-in-the-Middle ARP-spoofing en malafide DHCP-aanvallen in Wi-Fi-netwerken detecteren. + Rogue DHCP-detectie + Man-in-the-Middle malafide DHCP-aanvallen detecteren in Wi-Fi-netwerken. + Bescherming tegen DNS rebinding + Site blokkeren wanneer DNS rebinding-aanval is gedetecteerd + Proxy + Gebruik socks5 proxy + Gebruik Bruggen Eigen Lijst + Systeemstandaard + Brug bewerken + Schakel Entry Nodes uit in Tor instellingen, anders kun je Bruggen niet gebruiken. + Standaardlijst bruggen gebruiken + Bruggen aanvragen + Bruggen toevoegen + Deactiveer brug eerst! + Verduistering + IPv6 bruggen + Selecteer type verduistering: + Auto + Er zijn nieuwe standaard Tor-bruggen beschikbaar. Wil je ze bijwerken? + Controleer of er nu nieuwe versies beschikbaar is + Tor niet gebruiken voor LAN-bestemmingen en door IANA gereserveerde IP-blokken + Automatisch bijwerken + Gebruik Root rechten voor DNSCrypt, Tor en I2P modules. Het inschakelen van deze functie laat modules onbeheerd en kan verbindingsproblemen veroorzaken! + Applicatie beschermen tegen gedood worden door Android + Internetverbinding wordt geblokkeerd tijdens de aanval + Blokkeer internet wanneer een aanval wordt gedetecteerd + InviZible Pro maakt alle verbindingen via de SOCKS5 proxy + Uitschakelen + Blokkeer internetverbinding wanneer Tor, DNSCrypt en Purple I2P zijn gestopt + Regels vernieuwen + Altijd VPN inschakelen en Verbindingen zonder VPN blokkeren voor InviZible Pro om het internet te blokkeren wanneer de app niet actief is + Werk regels bij bij elke connectiviteitswijziging + Melding tonen + Slaapstand voorkomen + Laat het apparaat niet in de sluimerstand vallen. Kan handig zijn bij gebruik van HOTSPOT. De batterij raakt leeg! + Compatibiliteitsmodus + Inschakelen als je apparaat een aangepaste rom heeft en de verbinding wordt verbroken wanneer je op de START-knop drukt. + Hulpberichten + Alle berichten tonen die zijn gemarkeerd als niet toegestaan om te tonen + Stoppen met het optimaliseren van batterijgebruik? + Netwerkgegevensbesparing uitschakelen? + Selecteer BusyBox + Auto + Apparaat BusyBox gebruiken + Toepassing BusyBox gebruiken + BusyBox niet gebruiken + HOTSPOT-Experimenteel + Delen starten bij opstarten + Tor delen toestaan + Route Alles via Tor + Routeer al het inkomende verkeer via Tor + Sites selecteren + Lijst met sites die worden geopend met Tor voor HOTSPOT + Sites uitsluiten + Website lijst om direct te openen voor HOTSPOT + Delen via I2P toestaan + TTL herstellen + IP-adres LAN-apparaat + Tethering configuratie openen + Herstart DNSCrypt + Start Tor opnieuw op + Start I2P opnieuw op + Multi-user ondersteuning + Selecteer iptables + Applicatie iptables gebruiken + Apparaat iptables gebruiken + Wacht op het slot van xtables + DNSCrypt, Tor, I2P zijn beschermd. + Root-opdrachten uitvoeren… + Verbinden… + Wachten op netwerk… + Even geduld… + Instellingen opgeslagen + Globale instellingen + Alleen voor gevorderde gebruikers! + Selecteer minstens één server! + Lokale poort om naar te luisteren. + Eisen dat servers (van statische + externe bronnen) voldoen aan specifieke eigenschappen + Gebruik servers die het DNSCrypt-protocol implementeren. + Gebruik servers die het DNS-over-HTTPS protocol implementeren. + Gebruik servers die het Oblivious DNS-over-HTTPS protocol implementeren. + Gebruik servers die bereikbaar zijn via IPv6 + De server moet DNS-beveiligingsuitbreidingen (DNSSEC) ondersteunen. + De server mag geen gebruikersquery\'s loggen (declaratief). + Gebruik altijd TCP om verbinding te maken met upstream servers + Gebruik TCP in plaats van UDP om verbinding te maken met DNSCrypt-servers. Deze optie moet ingeschakeld zijn als je DNSCrypt over Tor gebruikt. + Uitgaande proxy + Proxy poort + Overige instellingen + Laat DNSCrypt niet proberen om systeem DNS-instellingen te gebruiken. Gebruik onvoorwaardelijk de Bootstrap-resolvers. + Activeer deze optie als je op een netwerk zit dat alleen uit IPv6 bestaat en IPv4-sites onbeschikbaar worden. Schakel deze optie anders niet in, anders kun je helemaal nergens verbinding mee maken. + Reeks van gebruikte statische IPv6-prefixen. + Filters + Regels voor doorsturen + Routeer zoekopdrachten voor specifieke domeinen naar een speciale set servers. + Regels voor doorsturen importeren + Importeer een bestand met doorstuurregels. Je kunt meerdere bestanden selecteren, dubbele regels worden verwijderd. + Regels voor doorsturen wissen + Regels voor onzichtbaarheid + Regels voor cloaking importeren + Importeer een cloaking rules bestand. Je kunt meerdere bestanden selecteren, dubbele regels worden verwijderd. + Regels voor cloaking wissen + Loggen van zoekopdrachten + Log deze typen query\'s niet om het aantal woorden te beperken. Houd leeg om alles te loggen. + Query log openen + Registratie van verdachte zoekopdrachten + Log queries voor niet-bestaande zones. Deze query\'s kunnen de aanwezigheid van malware, kapotte/verouderde applicaties en apparaten die hun aanwezigheid aan derden melden, onthullen. + Gebruik servers die bereikbaar zijn via IPv4 + Log clientvragen in een bestand. + Sta het gebruik van gegevens op de achtergrond toe en sta het gebruik van gegevens toe terwijl de gegevensbesparing is ingeschakeld. Dit is essentieel voor een soepele online ervaring. + Scriptbesturing inschakelen + Internet geblokkeerd vanwege de Kill switch. Start Tor, DNSCrypt of I2P om de verbinding toe te staan. Of schakel de Kill-schakelaar uit in Algemene instellingen. + Gebruik het volgende commando om applicatiemodules te beheren: m broadcast -a pan.alexander.tordnscrypt.SHELL_SCRIPT_CONTROL --ei dnscrypt 1 --ei tor 1 --ei i2p 1 %s Waar 1 - start, 0 - stopt de module. + Rootopdrachten opslaan in logboek + Verdachte logging + Sluit InviZible Pro uit van Android batterijoptimalisatie om te voorkomen dat het systeem DNSCrypt, Tor of I2P op elk moment afsluit. Sommige speciale systemen, zoals MIUI, vereisen mogelijk extra stappen. + Ondersteuning voor Dual Apps, MIUI, Island, Shelter en toepassingen met werkprofiel. Kan ineffectief zijn in VPN-modus + Schakel Fix TTL in en configureer LAN-apparaat om verbinding te maken met InviZible. Standaardgateway gebruiken: %1$s, DNS-server: %2$s. + TTL wordt vastgezet op 64 met behulp van een lokaal VPN. Kernelondersteuning is niet vereist. Start DNSCrypt of/en Tor om TTL vast te zetten. + Wacht tot het exclusieve iptables slot verkregen kan worden om gelijktijdige wijziging van iptables regels te voorkomen. + Routeer alle TCP verbindingen naar een lokale Tor inkomende Socks5 proxy. Tor ondersteunt UDP niet, dus stel Force TCP ook in op true. + Servers gedefinieerd door bronnen op afstand moeten voldoen aan specifieke eigenschappen + SOCKS proxy + Cloaking retourneert een vooraf gedefinieerd adres voor een specifieke naam. Naast het fungeren als een HOSTS-bestand, kan het ook het IP-adres van een andere naam retourneren. Het zal ook CNAME-afvlakking doen. + De server mag zijn eigen zwarte lijst niet afdwingen (voor ouderlijk toezicht, blokkeren van advertenties...). + Dit is een normale, niet-versleutelde DNS-oplosser die alleen wordt gebruikt voor eenmalige queries bij het ophalen van de initiële lijst met resolvers en alleen als de systeem-DNS-configuratie niet werkt. Hij zal nooit gebruikt worden als er al lijsten in de cache staan. + Schakel ondersteuning in voor HTTP/3 (DoH3, HTTP over QUIC). Merk op dat, net als DNSCrypt maar in tegenstelling tot andere HTTP versies, dit UDP en (meestal) poort 443 gebruikt in plaats van TCP. + Loggen van zoekopdrachten + Verdacht logboek openen + Blokkeren op basis van patroon (zwarte lijst) + Zwarte lijst + Zwarte lijsten bestaan uit één patroon per regel. + Zwarte lijst importeren + Importeer een domeinlijst of hosts-bestand. Je kunt meerdere bestanden selecteren, dubbele regels worden verwijderd. + Zwarte lijst verwijderen + Op patroon gebaseerde IP-blokkering (IP-blacklist) + Zwarte IP-lijst importeren + Zwarte IP-lijst verwijderen + Witte lijst importeren + Witte lijst + Importeer een lijst met domeinen. Je kunt meerdere bestanden selecteren, dubbele regels worden verwijderd. + Witte lijst verwijderen + Servers + Bronnen + Externe lijsten met beschikbare servers. + De serverlijst wordt bijgewerkt na de opgegeven vertraging in uren. + Regels worden bijgewerkt na de opgegeven vertraging in uren. + Relais + DNSCrypt-relais + Voorkeur bestaat niet in dnscrypt-proxy.toml! + Logboek verwijderen + DNSCrypt-logboek + DNSCrypt verdachte logboek + Logboek is leeg + Reageer onmiddellijk op A en AAAA query\'s voor hostnamen zonder domeinnaam. + Bewerk dnscrypt-proxy.toml direct + Importregels + Even geduld... %d regels geïmporteerd. + IP Blacklist bestaan uit één patroon per regel. + Zwarte IP-lijst + Zwarte IP-lijst importeren. Je kunt meerdere bestanden selecteren, dubbele regels worden verwijderd. + Op patroon gebaseerde witte lijsten (zwarte lijsten voor te omzeilen) + Klaar! De regels zijn gewist. + Witte lijst ondersteunen dezelfde patronen als zwarte lijst. Als een naam overeenkomt met een witte lijst, zal de corresponderende sessie namen en IP-filters omzeilen. + Bronnen + De relais lijst wordt bijgewerkt na de opgegeven vertraging in uren. + Reageer onmiddellijk op queries voor lokale zones in plaats van ze te lekken naar upstream resolvers (wat altijd fouten of timeouts veroorzaakt). + Te veel regels om weer te geven. Alleen de eerste 1000 worden getoond. + Wanneer Tor een virtueel (ongebruikt) adres moet toewijzen vanwege een MAPADDRESS commando van de controller of de AutomapHostsOnResolve functie, kiest Tor een niet-toegewezen adres uit dit bereik. + Schakel rivate DNS uit in de netwerkinstellingen van Android. Deze optie interfereert met InviZible. + Reageer onmiddellijk op IPv6-gerelateerde query\'s met een leeg antwoord. Dit maakt dingen sneller wanneer er geen IPv6-connectiviteit is, maar kan ook betrouwbaarheidsproblemen veroorzaken met sommige stub resolvers. + Schakel proxy uit in de WiFi-netwerkinstellingen van Android. Deze optie interfereert met InviZible. + Schakel proxy uit in de APN-instellingen van het mobiele Android-netwerk. Deze optie interfereert met InviZible. + Gereed! %d regels geïmporteerd. + Probeer ingebouwde crypto hardwareversnelling te gebruiken als die beschikbaar is. + Als dit niet nul is, probeer dan minder vaak naar schijf te schrijven. + "Deze optie bepaalt Tor\'s gebruik van padding om zich te beveiligen tegen sommige vormen van verkeersanalyse. Uitschakelen om bandbreedte te besparen." + Als dit is ingeschakeld zal Tor OR verbindingen niet lang open houden en minder padding versturen op deze verbindingen. Inschakelen om bandbreedte te besparen. + Nodes + Isolatie-instellingen + Een lijst met landcodes die je nooit moet gebruiken bij het kiezen van een exit node, die verkeer voor je aflevert buiten het Tor netwerk. + Een lijst met landcodes die je moet vermijden bij het bouwen van een circuit. + Een lijst met landcodes van knooppunten om te gebruiken voor de eerste hop in je normale circuits. + Schakel Tor bruggen uit als je entry nodes wilt selecteren. + Als deze optie is ingeschakeld, zal Tor alleen uitgaande verbindingen maken naar OK\'s die draaien op poorten die je firewall toestaat (standaard 80 en 443). + Om deze optie te activeren, moeten bridges uitgeschakeld worden of moeten bridges met alleen 80 en 443 poorten gebruikt worden. + Als deze optie is ingeschakeld, zal Tor twee servers waarvan het IP adres te dicht bij elkaar ligt niet op hetzelfde circuit zetten. Op dit moment zijn twee adressen te dichtbij als ze in hetzelfde /16 bereik liggen. + HTTP Tunnel + Transparante proxy + Open deze poort om te luisteren naar transparante proxyverbindingen. + DNS resolveren + Open deze poort om te luisteren naar UDP DNS verzoeken en deze anoniem op te lossen. + Als dit uitgeschakeld is, zal Tor vermijden om verbinding te maken met adreslijstservers en entry nodes via IPv4. + Voorkeur bestaat niet in tor.conf! + Host of IP om te deblokkeren + Host of IP voor te omzeilen + Host of IP bewerken + Uitgeschakeld + Rendez-vous kiezen + STUN-servers voor SnowFlake-bruggen + Tor maakt alle OF-verbindingen via de SOCKS 5 proxy. + tor.conf direct bewerken + Cache wissen + Tor zal recente verbindingen naar hosts bijhouden en proberen voor elke verbinding hetzelfde exit knooppunt te hergebruiken. Deze optie kan helpen om eindeloze captcha te voorkomen. + Deel geen circuits met streams van verschillende apps. + Deel geen circuits met streams die gericht zijn op een andere bestemmingspoort. + Apps om InviZible te omzeilen + Alles selecteren + Selectie verwijderen + Algemene instellingen + Inkomende verbindingen + SOCKS uitgaande proxy + Een lijst met landcodes om te gebruiken als exit node - dat is een node die verkeer voor je aflevert buiten het Tor netwerk. + Als StrictNodes is ingeschakeld, zal Tor alleen de ExcludeNodes optie behandelen als een vereiste om op te volgen voor alle circuits die je genereert, zelfs als dit de functionaliteit voor jou verbreekt. + Overweeg elke NUM seconden of je een nieuw circuit wilt bouwen. + SOCKS proxy + Als dit is ingeschakeld, kan Tor verbinding maken met adreslijstservers of toegangsknooppunten via IPv6. + Poort om te luisteren naar inkomende verbindingen (standaard: auto (willekeurig)). + Hergebruik gerust een circuit dat maximaal NUM seconden geleden voor het eerst is gebruikt, maar koppel nooit een nieuwe stream aan een circuit dat te oud is. + Tor gaat in een slapende toestand als het geen clientactiviteit ziet gedurende een bepaalde tijd. Moet minstens 10 minuten zijn + Apps voor gebruik met InviZible + Open deze poort om te luisteren naar verbindingen van SOCKS-sprekende applicaties. + Open deze poort om te luisteren naar proxyverbindingen die het HTTP CONNECT protocol gebruiken in plaats van SOCKS. + Verbergt de domeinnaam van de broker voor de internetprovider. De broker wordt gebruikt om een verbinding tot stand te brengen. + Deel geen circuits met streams die gericht zijn op een ander bestemmingsadres. + Externe IP van de router voor inkomende verbindingen (standaard: auto als SSU2 is ingeschakeld). + Communicatie via IPv4 inschakelen. + Communicatie via IPv6 inschakelen. + De router accepteert geen transit tunnels, waardoor transitverkeer volledig wordt uitgeschakeld. + Router zal worden gestort. + Bandbreedtelimiet in KBps. + Max % van bandbreedtelimiet voor doorvoer. 0-100. + SSU transportprotocol inschakelen (gebruik UDP). + Uitgaande proxy + HTTP proxy + HTTP proxy poort + De poort waarop geluisterd moet worden (HTTP Proxy). + HTTP outproxy + Toegangspoort tot het reguliere internet + HTTP outproxy adres + Socks proxy + Socks proxy + Socks proxy poort + De poort waarop geluisterd moet worden (SOCKS Proxy). + SOCKS outproxy + Adres van outproxy (IP of lokaal). Verzoeken buiten I2P zullen daarheen gaan. + SOCKS outproxy adres + SOCKS outproxy poort + SAM interface + SAM interface + Poort van SAM-brug. + Cryptografie + Gebruik ElGamal vooraf berekende tabellen. + UPnP in- of uitschakelen. + Opnieuw uitzaaien + Controleer su3 handtekening. + Limieten + Beperk het aantal open bestandsdescriptors (0 - systeemlimiet gebruiken). + Maximale grootte van het kernbestand in Kb (0 - systeemlimiet gebruiken). + Adresboek + URL\'s voor adresboekabonnementen. + Voorkeur bestaat niet in i2pd.conf! + Tunnels.conf direct bewerken + Informatie + Het lijkt erop dat I2P is gedood door het Android-systeem. Controleer de apparaatinstellingen! + Je hebt geconfigureerd om de Socks5 proxy te gebruiken, maar de proxy lijkt niet te werken. Controleer uw instellingen! + Kinderslot + Je kunt de controle over deze toepassing vergrendelen. Voer een wachtwoord in of gebruik het vorige wachtwoord. + Voer het wachtwoord in. + Foutief wachtwoord! + Controle ontgrendeld + Controle Vergrendeld + HOTSPOT + Logboeken verzamelen + Pad voor logboeken: + Logboeken opgeslagen. Stuur InvizibleLogs.txt naar de ontwikkelaar. Je kunt logs vinden in de map: + Bijwerken + Bijwerken + Later + Niet opnieuw vragen + Start InviZible Pro opnieuw op en wacht tot de installatie is voltooid! Herconfigureer HOTSPOT na de installatie, als u deze gebruikt! + Downloaden is gestopt + DOWNLOAD ANNULEREN + Nieuwe versie: + DNSCrypt-update is beschikbaar. Wilt u deze downloaden en bijwerken? De update wordt op de achtergrond uitgevoerd. + Tor-update is beschikbaar. Wil je deze downloaden en bijwerken? De update wordt op de achtergrond uitgevoerd. + Bijwerkingen controleren + Wacht even terwijl de updates worden gecontroleerd. + De updateserver is tijdelijk niet beschikbaar. Probeer het later nog eens. + De updateserver was niet beschikbaar. + Fout bijwerken. + Er werden geen updates gevonden. + Laatste controle: + Er zijn updates gevonden. + Updates werden geïnstalleerd. + Het lijkt erop dat je al 3 exemplaren van InviZible hebt geactiveerd met deze code. Als het niet klopt, neem dan contact op met de ontwikkelaar. + Het lijkt erop dat je PRO-code verkeerd is. Neem contact op met de ontwikkelaar. + Je hebt InviZible Pro updates meer dan 5 keer per dag gecontroleerd. Probeer het later nog eens. + Doneren + InviZible Pro Project zoekt uw hulp. Ga naar de donatiepagina of voer de premiecode in die u al hebt ontvangen. + Bezoek + Code invoeren + Code invoeren + InviZible Pro Project en de auteur spreken hun waardering uit voor de hulp! + Externe lijsten met beschikbare relais. + VPN-modus is actief + VPN-modus is uitgeschakeld + Toegang tot I2P-netwerk met Purple I2P + Attentie + InviZible Pro kan %1$s niet starten! Probeer de instellingen van %2$s te resetten. Als dit niet helpt, start uw apparaat dan opnieuw op. + Aangepaste server toevoegen + DNSCRYPT beveiligen + Ongeldige aangepaste serverconfiguratie. Controleer het SDNS-veld. + Houd er rekening mee dat je een aankoop in behandeling hebt: + Opnieuw instellen. + Sorry, maar het is onmogelijk om je aankoop te bevestigen. Je ontvangt je geld terug na 3 dagen. + Wilt u de wijzigingen opslaan? Het kan InviZible Pro afbreken. + Lopende diensten + Meldingen bijwerken + Kennisgeving hoofdopdrachten + Firewallmeldingen + Hulpmeldingen + Er is een crashmelding ontdekt. Wil je het naar de ontwikkelaar sturen om InviZible beter te maken? + Nieuwe Tor-identiteit + Tor identiteit is veranderd + Proxy server: + Proxy poort: + Gebruikersnaam: + Wachtwoord: + Dit veld kan leeg blijven + Applicaties uitsluiten + Selecteer in ieder geval de proxy-eigenaarstoepassing als u een lokale proxy gebruikt. + Gebruik een proxy voor apps die Tor omzeilen + Gebruik proxy voor DNSCrypt + Gebruik proxy voor Tor + Gebruik proxy voor Purple I2P + Opslaan en inschakelen + Kan geen verbinding maken met proxy: %s + Succesvolle verbinding. Ping %s ms. + ARP spoofing gedetecteerd! + Man-in-the-middle aanval gedetecteerd! Je gegevens kunnen worden onderschept door een ander apparaat op het lokale netwerk. Schakel uit, wacht een paar seconden en schakel Wi-Fi in. Het gebruik van de huidige Wi-Fi-hotspot kan onveilig zijn! + ARP-spoofingdetectie wordt niet ondersteund voor uw apparaat! + DNS opnieuw binden + Potentiële DNS Rebinding-aanval gedetecteerd! Site %s is geblokkeerd. + Sorteren op naam + Sorteren op UID + Firewall Aan/Uit + Wil je de wijzigingen opslaan? + %s is geïnstalleerd + Internet toestaan? + Verbindingen naar lokale netwerk-, onion- en i2p-sites toestaan + Verbindingen in WiFi-netwerken toestaan + Verbindingen in GSM-netwerken toestaan + Verbindingen toestaan tijdens roaming + Verbindingen toestaan met VPN ingeschakeld + Alles deselecteren + Firewallinstellingen + Verbindingen voor nieuwe apps toestaan + Alle apps weergeven + Apps worden getoond ongeacht of de app verbinding kan maken met het internet of niet. + Alleen apps die verbinding kunnen maken met het internet worden weergegeven. + Tor IP wijzigen + Tor IP veranderen + U hebt meer dan 3 tegels toegevoegd aan Snelle instellingen. Laat er slechts 3 over. Het toevoegen van meer dan 3 tegels kan problemen veroorzaken met de bediening van de app. + Lijst is leeg + ↓ Trekken om te vernieuwen ↓ + Internetverbinding controleren + Druk om toe te voegen. + Lang indrukken om toe te voegen. + Druk op om te bewerken. + Lang indrukken om te bewerken. + Relays anonimiseren + Lijst met externe apparaten toevoegen + Lijst met externe apparaten vervangen + Regel toevoegen + regels + Totaal: %d regels + URL toevoegen + Schakel het NTCP2 transportprotocol in (gebruik TCP). + Specificeer de proxyserver voor NTCP. Moet http://address:port of socks://address:port zijn. + InviZible Pro Project en zijn auteur spreken hun waardering uit voor de hulp. Wilt u downloaden en upgraden naar PRO? De update gaat door op de achtergrond. + Alleen voor PRO-versie + Lokale lijst vervangen + InviZible Pro Project zoekt uw hulp. Druk op OK om premium functies aan te schaffen. + IP verbergen met TOR + SSU2 transportprotocol inschakelen (gebruik UDP). + HTTP proxy + Bestand downloaden + HTTP proxy upstream uit proxy url (zoals http://false.i2p) + SAM interface poort + Maximum aantal transit tunnels overschrijven. Standaard 2500. + URL voor adresboekabonnement voor eerste installatie. + i2pd.conf direct bewerken + Het lijkt erop dat DNSCrypt is gedood door het androïde systeem. Uw internetverbinding is hersteld. Controleer de apparaatinstellingen! + Het lijkt erop dat Tor geen verbinding kan maken met het internet. ISP kan Tor-verbindingen blokkeren. Je kunt proberen Tor Bridges te gebruiken. Je vindt ze in MENU -> Snelle instellingen -> Bruggen + Het lijkt erop dat Tor is gedood door het androïde systeem. Je internetverbinding is hersteld. Controleer de apparaatinstellingen! + Het lijkt erop dat DNSCrypt geen verbinding kan maken met het internet. U kunt proberen een andere DNSCrypt-server te kiezen. U vindt ze in MENU -> Snelle instellingen -> DNSCrypt-servers selecteren + Sommige InviZible modules zijn klaar voor een update. Wilt u deze bijwerken? Dan worden de instellingen van uw modules overschreven! + InviZible Pro update is available. Do you want to download and update it? The update will continue in the background. + Purple I2P update is beschikbaar. Wilt u deze downloaden en bijwerken? De update wordt op de achtergrond uitgevoerd. + Het lijkt op deze onofficiële versie van InviZible. Wees er voorzichtig mee! + Het blokkeren van deze systeemapp kan leiden tot een instabiele internetverbinding! + Groen - verbindingen toestaan, wit - verbindingen weigeren + VPN-modus fout! + Rogue DHCP gedetecteerd! + Configureer het gebruik van de proxyserver in de instellingen van het aangesloten apparaat. Standaardwaarden: IP:10.1.10.1 Poort:8118 + Applicatielijst om direct te openen. Deze toepassingen gebruiken geen Tor of Proxy om verbinding te maken met het internet! + Systeem + Helaas is deze functie alleen beschikbaar voor de premium versie. + + \tBedankt voor het kiezen van InviZible Pro. Ik ben ervan overtuigd dat het uw privacy zal verbeteren en een soepele online ervaring zal bieden. + \n\n\tInviZible Pro bevat Tor, DNSCrypt en Purple I2P als modules. + \n\n\tInviZible Pro kan root gebruiken als uw apparaat geroot is, of gebruikt een lokale VPN om internetverkeer rechtstreeks naar de Tor-, DNSCrypt- en I2P-netwerken te leiden. + \n\n\tPrivacybeleid: + \n\tInviZible Pro verzamelt of deelt geen persoonlijke of gevoelige gebruikersgegevens. + \n\n\tCopyright © 2019-2025 + \n\tGarmatin Oleksandr + \n\tinvizible.soft@gmail.com + \n\tinvizible.net/en/privacy + + Internetverbindingen voor nieuw geïnstalleerde applicaties worden geblokkeerd. + Anonimiserende relais worden niet gebruikt. + Alles selecteren + Lokale lijst toevoegen + Firewall + Alle + Gebruiker + Meldingen zijn essentieel voor het weergeven van belangrijke informatie over de werking van de app, app-controle en gedetecteerde aanvallen. Wilt u meldingen voor InviZible toestaan? + Om de firewall te activeren, moet je op zijn minst DNSCrypt of Tor starten. + Internetverbindingen voor nieuw geïnstalleerde apps zijn toegestaan. + diff --git a/tordnscrypt/src/main/res/values-pl/strings.xml b/tordnscrypt/src/main/res/values-pl/strings.xml index cba484c3b..dbcb5fdd9 100644 --- a/tordnscrypt/src/main/res/values-pl/strings.xml +++ b/tordnscrypt/src/main/res/values-pl/strings.xml @@ -514,7 +514,7 @@ \n\n\tInviZible Pro może korzystać z roota, jeśli twoje urządzenie jest zrootowane, lub używa lokalnej sieci VPN do dostarczania ruchu internetowego bezpośrednio do sieci Tor, DNSCrypt i I2P. \n\n\tPolityka prywatności: \n\tInviZible Pro nie gromadzi ani nie udostępnia żadnych osobistych ani wrażliwych danych użytkownika. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy @@ -550,4 +550,7 @@ Dodaj regułę Razem: %d reguł Dodaj adres URL + Dodaj zdalną listę + Zastąp listę zdalną + reguły diff --git a/tordnscrypt/src/main/res/values-pt-rBR/strings.xml b/tordnscrypt/src/main/res/values-pt-rBR/strings.xml index f0da45eb8..4643b8574 100644 --- a/tordnscrypt/src/main/res/values-pt-rBR/strings.xml +++ b/tordnscrypt/src/main/res/values-pt-rBR/strings.xml @@ -121,7 +121,7 @@ "Atualizar regras toda vez que a conexão mudar" "Mostrar notificação" "Impedir que o dispositivo durma" - Proteção adicional no modo sem Root para prevenir que o aplicativo seja morto pelo Android. Pode drenar bateria + Não permita que o dispositivo entre no modo de cochilo. Pode ser útil ao usar o HOTSPOT. Consome a bateria! "Mensagens de ajuda" Mostrar todas as mensagens que foram marcadas como não permitidas para exibição Por favor, exclua o InviZible Pro da otimização da bateria do Android para evitar que o sistema termine o DNSCrypt, Tor ou I2P a qualquer momento. Alguns sistemas especiais, como MIUI, podem exigir passos adicionais. @@ -142,7 +142,7 @@ Permitir I2P Tethering "Corrigir TTL" - O TTL será fixado em 64 usando uma VPN local. O suporte ao kernel não é necessário. Ele só pode ser usado no \"modo Root\" com a opção \"Executar módulos com Root\" desabilitada. Para corrigir o TTL, inicie DNSCrypt ou / e Tor. + O TTL será fixado em 64 usando uma VPN local. O suporte do kernel não é necessário. Para corrigir o TTL, por favor, inicie o DNSCrypt ou/e o Tor. Abrir configurações do Tethering "Por favor, reinicie o DNSCrypt" "Por favor, reinicie o Tor" @@ -152,14 +152,14 @@ "DNSCrypt, Tor 2 o I2P são protegidos. Não esconda." "Por favor, aguarde…" "Configurações salvas" - "CONFIGURAÇÕES GLOBAIS" + Configurações globais Somente para usuários avançados! "Selecione pelo menos um servidor!" "Porta local de escuta." - "Requer servidores (de fontes estáticas + remotas) para satisfazer propriedades específicas." + Requer servidores (de fontes estáticas + remotas) para satisfazer propriedades específicas "Use servidores que implementam o protocolo DNSCrypt." "Use servidores que implementam o protocolo DNS-over-HTTPS." - Requer servidores de fontes remotas para satisfazer propriedades específicas. + Requer servidores de fontes remotas para satisfazer propriedades específicas O servidor deve suportar extensões seguras do DNS (DNSSEC). "O servidor não deve manter log das queries do usuário (declarativo)." @@ -167,31 +167,31 @@ Sempre use TCP para se conectar a servidores upstream Isso pode ser útil se você precisar rotear tudo através do Tor.. Caso contrário, deixe como falso, pois isso não melhora a segurança (dnscrypt-proxy sempre criptografará tudo, mesmo usando UDP) e só aumentará a latência. Proxy SOCKS - "Ativar proxy" - "Roteie todas as conexões TCP para um nó Tor local. Tor não suporta UDP, então defina force_tcp como true também." + Proxy de saída + Encaminhe todas as conexões TCP para um proxy Socks5 de entrada do Tor local. O Tor não oferece suporte a UDP, portanto, também habilite Forçar TCP. Porta de Proxy Outras Configurações Esse é um resolvedor de DNS normal, não criptografado, que será usado somente para consultas únicas ao recuperar a lista inicial de resolvedores e somente se a configuração de DNS do sistema não funcionar. Ele nunca será usado se as listas já tiverem sido armazenadas em cache. - "Nunca deixe dnscrypt-proxy tentar usar as configurações de DNS do sistema. Use incondicionalmente o resolvedor de fallback." + Não permita que o DNSCrypt tente usar as configurações de DNS do sistema. Use incondicionalmente os resolvedores do Bootstrap. Filtros Regras de encaminhamento "Roteie as consultas de domínios específicos para um conjunto dedicado de servidores." Regras de camuflagem Cloaking retorna um endereço predefinido para um nome específico. Além de atuar como um arquivo HOSTS, ele também pode retornar o endereço IP de um nome diferente. Ele também fará o nivelamento de CNAME. - "Log de consulta." + Registro de consultas - Salvar logs das queries do cliente para um arquivo. + Salvar as consultas do cliente em um arquivo. - "Ativar log de consultas" + Registro de consultas "Para reduzir o detalhamento, não registrar esses tipos de consulta. Mantenha vazio para logar tudo." - "Abrir log de consulta" - "Log de consultas suspeitas." + Abrir registro de consulta + Log de consultas suspeitas "Log de consultas para zonas inexistentes. Essas consultas podem revelar a presença de malware, aplicativos quebrados / obsoletos e dispositivos sinalizando sua presença para terceiros." - "Ativar log de consultas suspeitas" + Registro de consultas suspeitas "Abrir log de consultas suspeitas" Bloqueio baseado em padrões (lista negra) @@ -216,7 +216,7 @@ "DNSCrypt Retransmissores" "Não existe preferência em dnscrypt-proxy.toml!" "Apagar log" - "Log de consulta DNSCrypt" + Registro de consulta DNSCrypt Log de consultas suspeitas do DNSCrypt "O log está vázio" "Responda imediatamente às consultas A e AAAA para nomes de host sem um nome de domínio." @@ -240,7 +240,7 @@ "Abra esta porta para ouvir conexões de aplicativos de fala SOCKS." Proxy transparente Abra esta porta para ouvir conexões de proxy transparentes. - "Ativar DNS" + Resolução de DNS "Abra esta porta para escutar as solicitações UDP DNS e resolvê-las anonimamente." "Se desabilitado, o Tor evitará a conexão com servidores de diretório e nós de entrada via IPv4." Se ativado, o Tor pode se conectar a servidores de diretório ou nós de entrada por IPv6. @@ -255,9 +255,9 @@ "Selecionar todos" "Remover Seleção" Configurações padrão - Permitir conexões de entrada - Porta para escutar conexões de entrada. - IP externo do roteador para conexões de entrada. + Conexões de entrada + Porta de escuta para conexões de entrada (padrão: auto (aleatório)). + IP externo do roteador para conexões de entrada (padrão: auto se o SSU2 estiver ativado). "Ative a comunicação por meio de IPv4." "Ative a comunicação por meio de IPv6." "O roteador não aceitará túneis de trânsito, desativando completamente o tráfego de trânsito." @@ -266,7 +266,7 @@ Máx % limite de largura de banda para trânsito. 0–100. "Habilite protocolo de transporte SSU (use UDP)." "Habilite protocolo de transporte NTCP2 (use TCP)." - Habilite ntcpproxy + Proxy de saída "Especifique o servidor proxy para NTCP. Deve ser http://endereço:porta ou socks://endereço:porta." "HTTP proxy" "HTTP proxy" @@ -283,13 +283,13 @@ "Criptografia" "Use tabelas pré-computadas ElGamal." Habilite ou desabilite o UPnP. - "Reseeding" + Ressemeando "Verifique a assinatura su3." "Limites" Substitua o número máximo de túneis de trânsito. 2500 por padrão. "Limite o número de descritores de arquivos abertos (0 - uso limite do sistema)." - Tamanho máximo do corefile em Kb (0 - limite do sistema de uso). - Addressbook + Tamanho máximo do arquivo principal em Kb (0 - usa o limite do sistema). + Livro de endereços "URL de assinatura do AddressBook para configuração inicial." "URLs de assinaturas do AdressBook." "Não existe preferencia no arquivo i2pd.conf!" @@ -406,14 +406,14 @@ Usar proxy socks5 "InviZible Pro fará todas as conexões através do proxy SOCKS5" "Modo de compatibilidade" - Ative se o seu dispositivo com uma ROM personalizada e a conexão for perdida quando você pressiona o botão INICIAR + Ative se o seu dispositivo tiver uma ROM personalizada e a conexão for perdida quando você pressionar o botão START "Habilitar controle de script" "Use o seguinte comando para gerenciar os módulos do aplicativo: \"am broadcast -a pan.alexander.tordnscrypt.SHELL_SCRIPT_CONTROL --ei dnscrypt 1 --ei tor 1 --ei i2p 1 %s\". Onde 1 - inicia, 0 - para o módulo." "Endereço IP do dispositivo LAN" "Habilite Fix TTL e configure o dispositivo LAN para se conectar ao InviZible. Use o gateway padrão: %1$s, servidor DNS: %2$s." "Suporte multiusuário" - Suporte para aplicativos duplos, MIUI, Island, Shelter e aplicativos de perfil de trabalho - "Habilitar proxy de saída SOCKS" + Suporte para aplicativos Dual Apps, MIUI, Island, Shelter e Work. Pode ser ineficaz no modo VPN + Proxy de saída SOCKS "O Tor fará todas as conexões OR por meio do proxy SOCKS 5." "Serviços Rodando" "Notificações de atualização" @@ -503,13 +503,13 @@ "Internet bloqueada devido ao Kill switch. Inicie o Tor, DNSCrypt ou I2P para permitir a conexão, ou desabilite o Kill switch em Configurações Comuns." "A lista está vazia" "Auto" - "Use device BusyBox" - "Use application BusyBox" - "Not use BusyBox" - "Use application iptables" - "Use device iptables" + Usar BusyBox do dispositivo + Usar BusyBox do aplicativo + Não usar BusyBox + Usar iptables do aplicativo + Usar o iptables do dispositivo "Wait for the xtables lock" - "Wait until the exclusive iptables lock can be obtained to prevent concurrent modification of iptables rules." + Aguarde até que o bloqueio exclusivo do iptables possa ser obtido para evitar a modificação simultânea das regras. Verificação de conectividade com a Internet Registros em tempo real Mostrar registros de conexão de aplicativos na guia DNS @@ -519,7 +519,7 @@ \n\n\tO InviZible Pro pode usar root, se seu dispositivo tiver privilégios de root, ou usa uma VPN local para transmitir tráfego de Internet para redes Tor, DNSCrypt e I2P. \n\n\tPolítica de Privacidade: \n\tO InviZible Pro não coleta ou compartilha nenhum dado pessoal ou sensível do usuário. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy @@ -568,4 +568,9 @@ Total: %d regras Adicionar URL As regras serão atualizadas após o atraso especificado em horas. + Habilite a VPN sempre ativa e bloqueie conexões sem VPN para que o InviZible Pro bloqueie a Internet quando o aplicativo não estiver em execução + Use proxy para aplicativos que ignoram o Tor + Substituir a lista remota + Substituir a lista local + Você configurou o uso do proxy Socks5, mas parece que ele não está sendo executado. Verifique suas configurações! diff --git a/tordnscrypt/src/main/res/values-pt/strings.xml b/tordnscrypt/src/main/res/values-pt/strings.xml index b593699a8..9b2babc78 100644 --- a/tordnscrypt/src/main/res/values-pt/strings.xml +++ b/tordnscrypt/src/main/res/values-pt/strings.xml @@ -406,7 +406,7 @@ \n\n\tO InviZible Pro pode usar root, se o seu aparelho tiver privilégios de root, ou usa uma VPN local para transmitir tráfego de Internet para redes Tor, DNSCrypt e I2P. \n\n\tPolítica de Privacidade: \n\tO InviZible Pro não coleta ou compartilha nenhum dado pessoal ou sensível do utilizador. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy diff --git a/tordnscrypt/src/main/res/values-ru/strings.xml b/tordnscrypt/src/main/res/values-ru/strings.xml index 1d55dac33..54b0dbe78 100644 --- a/tordnscrypt/src/main/res/values-ru/strings.xml +++ b/tordnscrypt/src/main/res/values-ru/strings.xml @@ -235,7 +235,7 @@ Разрешить соединение через IPv4. Разрешить соединение через IPv6. Не разрешать транзитные туннели. Отключает транзит полностью. - Устройство будет \"ненасытным\". + Устройство будет флудфилом. Ограничение полосы пропускания в KBps. Максимальная полоса пропускания для транзита 0–100%. Включить SSU протокол (использовать UDP). @@ -272,6 +272,7 @@ Похоже андроид закрыл модуль Tor. Ваше интернет соединение восстановлено. Проверьте настройки устройства! Похоже андроид закрыл модуль I2P. Проверьте настройки устройства! Похоже DNSCrypt не может подключиться к интернету. Можете попробовать выбрать другие сервера DNSCrypt. Вы сможете найти их в МЕНЮ -> Быстрые настройки -> Выбрать сервера DNSCrypt + Вы настроили использование Socks5 прокси, но прокси, похоже, не работает. Пожалуйста, проверьте свои настройки! Похоже Tor не може подключиться к интернету. Интернет провайдер может блокировать Tor. Попробуйте использовать мосты Tor. Вы сможете найти их в МЕНЮ -> Быстрые настройки -> Мосты Детский замок Вы можете заблокировать интерфейс этого приложения. Пожалуйста, введите пароль или используйте предыдущий. @@ -373,7 +374,7 @@ \n\n\tInviZible Pro может использовать рут, если у Вас есть рут права, или использовать локальный VPN для доставки интернет траффика напрямую в сети Tor, DNSCrypt и I2P. \n\n\tПолитика конфиденциальности: \n\tInviZible Pro не собирает и не передает какие-либо личные или конфиденциальные данные пользователей. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy @@ -448,6 +449,7 @@ Исключить приложения Список приложений для использования напрямую. Эти приложения не будут использовать Tor или прокси, чтобы подключиться к интернету! Пожалуйста, выберите как минимум приложение - владелец прокси, если вы используете локальный прокси. + Использовать прокси для приложений, которые работают без Tor Использовать прокси для DNSCrypt Использовать прокси для Tor Использовать прокси для Purple I2P diff --git a/tordnscrypt/src/main/res/values-ta/strings.xml b/tordnscrypt/src/main/res/values-ta/strings.xml index b0bf05883..d7f2e2b4c 100644 --- a/tordnscrypt/src/main/res/values-ta/strings.xml +++ b/tordnscrypt/src/main/res/values-ta/strings.xml @@ -1,5 +1,5 @@ - + DNSCrypt மற்றும் I2P உடன் Tor உங்கள் தனியுரிமைக்கான சிறந்த தீர்வு I2P இயங்குகிறது @@ -9,10 +9,10 @@ I2P நிறுவப்படவில்லை DNSCrypt நிறுத்தப்பட்டது DNSCrypt நிறுவப்பட்டது - DNSCrypt நிறுவுதல் + Dnscrypt நிறுவுதல் DNSCrypt நிறுவப்படவில்லை தயவுசெய்து காத்திருங்கள்… - மீண்டும் காட்டாதே + காட்ட வேண்டாம் Tor நிறுத்தப்பட்டது DNSCrypt தொடங்குகிறது DNSCrypt இயங்குகிறது @@ -37,14 +37,14 @@ மறுக்கவும் நீ சொல்வது உறுதியா\? இந்தச் செயலைச் செயல்தவிர்க்க முடியாது! உள்ளூர் VPN பயன்முறையில் InviZible Pro அல்லது சொந்த ப்ராக்ஸி அல்லது உள்ளூர் VPN அம்சத்துடன் கூடிய பயன்பாடுகளை ப்ராக்ஸி பயன்முறையில் பயன்படுத்தலாம். - ப்ராக்ஸி பயன்முறையில் சொந்த ப்ராக்ஸி அல்லது உள்ளூர் VPN அம்சத்துடன் கூடிய பயன்பாடுகளுடன் InviZible Pro ஐப் பயன்படுத்தலாம். + பதிலாள் பயன்முறையில் சொந்த பதிலாள் அல்லது உள்ளக விபிஎன் அம்சத்துடன் பயன்பாடுகளுடன் இன்விசிபிள் புரோவைப் பயன்படுத்தலாம். கோப்பை சேமிப்பதில் பிழை! பொதுவான அமைப்புகள் DNSCrypt அமைப்புகள் Tor அமைப்புகள் I2P அமைப்புகள் வேகமான அமைப்புகள் - காப்பு மற்றும் மீட்டமை + காப்புப்பிரதி மற்றும் மீட்டமை பற்றி பதிவுகள் அமைப்புகளை மீட்டமை @@ -56,11 +56,493 @@ காப்புப்பிரதி மீட்டெடுக்கப்பட்டது அமைப்புகள் தானாக தொடங்கு - துவக்கத்தில் DNSCrypt ஐ தொடங்கவும் + ஆட்டோச்டார்ட் டி.என்ச்கிரிப்ட் அமைப்புகள் I2P நிறுத்தப்பட்டது DNSCrypt நிறுத்தப்பட்டது துவக்கத்தில் தொடங்கவும் I2P நிறுவப்பட்டது ரூட் கிடைக்கிறதா எனச் சரிபார்க்கிறது… + மாற்றங்களைச் சேமிக்க விரும்புகிறீர்களா? + %s நிறுவப்பட்டுள்ளன + பின்னர் கேளுங்கள் + புதுப்பிப்பு + சேர்க்க நீண்ட அழுத்தவும். + தனிமைப்படுத்தும் அமைப்புகள் + பதிவிறக்கம் நிறுத்தப்பட்டது + டோர் இயங்கும் + டோர் நிறுத்துதல் + காப்பீடு பதிப்பு + நன்கொடை + தளங்களைத் தேர்ந்தெடுக்கவும் + Pls தெளிவற்ற வகையைத் தேர்ந்தெடுக்கவும்: + டோர் ஐபி மாற்றும் + சாம் இடைமுகம் + I2pd.conf ஐ நேரடியாகத் திருத்தவும் + கடவுச்சொல்லை உள்ளிடவும். + தவறான கடவுச்சொல்! + புதுப்பிப்புகளை சரிபார்க்கிறது + ஃபயர்வால் அறிவிப்புகள் + துணை அறிவிப்புகள் + கடவுச்சொல்: + I2p ஐ மறுதொடக்கம் செய்யுங்கள் + ரிலேக்கள் + மூலங்கள் + ச்னோஃப்ளேக் பாலங்களுக்கான ச்டன் சேவையகங்கள் + கண்ணுக்கு தெரியாத பயன்பாடுகள் + உள்வரும் இணைப்புகள் + IPv6 மூலம் தகவல்தொடர்புகளை இயக்கவும். + கிடைக்கக்கூடிய ரிலேக்களின் தொலைநிலை பட்டியல்கள். + ரூட் கட்டளைகள் அறிவிப்பு + இந்த புலத்தை காலியாக விடலாம் + DNS-Over-HTTPS நெறிமுறையை செயல்படுத்தும் சேவையகங்களைப் பயன்படுத்தவும். + சேவையகம் அதன் சொந்த தடுப்புப்பட்டியலைச் செயல்படுத்தக்கூடாது (பெற்றோரின் கட்டுப்பாட்டுக்கு, விளம்பரங்களைத் தடுக்கும்…). + DNSCRYPT சேவையகங்களுடன் இணைக்க UDP க்கு பதிலாக TCP ஐப் பயன்படுத்தவும். நீங்கள் DOR க்கு மேல் DnScrypt ஐப் பயன்படுத்தினால் இந்த விருப்பம் இயக்கப்பட வேண்டும். + சாக்ச் பதிலாள் + மேலோடை சேவையகங்களுடன் இணைக்க எப்போதும் TCP ஐப் பயன்படுத்தவும் + ஆண்ட்ராய்டு பிணையம் அமைப்புகளில் தனியார் டிஎன்எச் அணைக்கவும். இந்த விருப்பம் கண்ணுக்கு தெரியாததில் தலையிடுகிறது. + மணிநேரங்களில் குறிப்பிட்ட தாமதத்திற்குப் பிறகு சேவையக பட்டியல் புதுப்பிக்கப்படும். + மணிநேரங்களில் குறிப்பிட்ட தாமதத்திற்குப் பிறகு விதிகள் புதுப்பிக்கப்படும். + டி.என்ச்கிரிப்ட் ரிலேக்கள் + ஆண்ட்ராய்டு வைஃபை பிணையம் அமைப்புகளில் ப்ராக்சியை முடக்கவும். இந்த விருப்பம் வரவேற்புடன் தலையிடுகிறது. + பதிவை நீக்கு + உள்ளக மண்டலங்களுக்கான வினவல்களை மேலோடை தீர்வுகளுக்கு (எப்போதும் பிழைகள் அல்லது காலக்கெடுவை ஏற்படுத்தும்) வினவல்களுக்கு உடனடியாக பதிலளிக்கவும். + ஆண்ட்ராய்டு மொபைல் பிணையம் APN அமைப்புகளில் ப்ராக்சியை முடக்கவும். இந்த விருப்பம் வரவேற்புடன் தலையிடுகிறது. + தயவுசெய்து காத்திருங்கள்… இறக்குமதி செய்யப்பட்ட %d விதிகள். + இயக்கப்பட்டால், டோர் ஐபிவி 6 க்கு மேல் அடைவு சேவையகங்கள் அல்லது நுழைவு முனைகளுடன் இணைக்கப்படலாம். + Tor.conf இல் விருப்பம் இல்லை! + தடை செய்ய புரவலன் அல்லது ஐபி + புரவலன் அல்லது ஐபியைத் திருத்தவும் + முடக்கப்பட்டது + ரெண்டெச்வச் என்பதைத் தேர்ந்தெடுக்கவும் + இணைய வழங்குநரிடமிருந்து தரகரின் டொமைன் பெயரை மறைக்கிறது. இணைப்பை நிறுவ தரகர் பயன்படுத்தப்படுகிறார். + டோர் சாக்ச் 5 பதிலாள் மூலம் அனைத்து அல்லது இணைப்புகளையும் உருவாக்கும். + வெவ்வேறு பயன்பாடுகளிலிருந்து ச்ட்ரீம்களுடன் சுற்றுகளை பகிர வேண்டாம். + வேறு இலக்கு முகவரியைக் குறிவைத்து ச்ட்ரீம்களுடன் சுற்றுகளைப் பகிர வேண்டாம். + வேறு இலக்கு துறைமுகத்தை குறிவைத்து ச்ட்ரீம்களுடன் சுற்றுகளைப் பகிர வேண்டாம். + அனைத்தையும் தெரிவுசெய் + NTCP க்கான பதிலாள் சேவையகத்தைக் குறிப்பிடவும். Http: // முகவரி: துறைமுகம் அல்லது சாக்ச்: // முகவரி: துறைமுகம். + HTTP பதிலாள் + Http outproxy முகவரி + சாக்ச் பதிலாள் + சாக்ச் பதிலாள் + சாக்ச் பதிலாள் துறைமுகம் + வெளிச்சத்தின் முகவரி (ஐபி அல்லது உள்ளூர்). I2P க்கு வெளியே கோரிக்கைகள் அங்கு செல்லும். + டோர் ஆண்ட்ராய்டு சிச்டத்தால் கொல்லப்பட்டதாகத் தெரிகிறது. உங்கள் இணைய இணைப்பு மீட்டெடுக்கப்பட்டது. சாதன அமைப்புகளை சரிபார்க்கவும்! + ஆண்ட்ராய்டு அமைப்பால் I2P கொல்லப்பட்டதாகத் தெரிகிறது. சாதன அமைப்புகளை சரிபார்க்கவும்! + உள்ளார்ந்த சார்பு திட்டமும் அதன் எழுத்தாளரும் உதவிக்கு பாராட்டுக்களை வெளிப்படுத்துகிறார்கள்! + Dnscrypt உடன் பாதுகாக்கவும் + ஊதா I2P உடன் I2P நெட்வொர்க்குக்கான அணுகல் + கவனம் + துரதிர்ச்டவசமாக, இந்த நற்பொருத்தம் காப்பீடு பதிப்பிற்கு மட்டுமே கிடைக்கிறது. + நீங்கள் வாங்குவதற்கு நிலுவையில் உள்ளது என்பதை நினைவில் கொள்க: + பேட்டரி பயன்பாட்டை மேம்படுத்துவதை நிறுத்தவா? + BUSICBOX ஐத் தேர்ந்தெடுக்கவும் + பகிர்தல் விதிகளை அழிக்கவும் + ஆடை விதிகள் + NTCP2 போக்குவரத்து நெறிமுறையை இயக்கவும் (TCP ஐப் பயன்படுத்தவும்). + வெளிச்செல்லும் பதிலாள் + HTTP பதிலாள் + மீண்டும் கேட்க வேண்டாம் + புதுப்பிப்புகள் சரிபார்க்கும்போது காத்திருங்கள். + புதுப்பிப்பு சேவையகம் தற்காலிகமாக கிடைக்கவில்லை. தயவுசெய்து பின்னர் மீண்டும் முயற்சிக்கவும். + சேமித்து இயக்கவும் + இந்த கணினி பயன்பாட்டைத் தடுப்பது நிலையற்ற இணைய இணைப்பை ஏற்படுத்தக்கூடும்! + பச்சை - அனுமதிக்கவும், வெள்ளை - இணைப்புகளை மறுக்கவும் + டோர் மற்றும் டோர் இயங்கும்போது மட்டுமே புதுப்பிப்புகளை சரிபார்க்கவும் + மற்றொன்று + ஆண்ட்ராய்டு ஆல் கொல்லப்படுவதிலிருந்து பயன்பாட்டைப் பாதுகாக்கவும் + ஆட்ச்பாட்டுக்கு HTTP துறைமுகம் 80 உடன் இணைப்பை மறுக்கவும் + வைஃபை நெட்வொர்க்குகளில் மேன்-இன்-நடுத்தர ஆர்ப் ச்பூஃபிங் மற்றும் முரட்டு டி.எச்.சி.பி தாக்குதல்களைக் கண்டறியவும். + பதிலாள் + SOCKS5 ப்ராக்சியைப் பயன்படுத்தவும் + சுவிட்சைக் கொல்லுங்கள் + ஒவ்வொரு இணைப்பு மாற்றத்திலும் விதிகளைப் புதுப்பிக்கவும் + ச்கிரிப்ட் கட்டுப்பாட்டை இயக்கவும் + உள்வரும் அனைத்து போக்குவரத்தையும் TOR வழியாக வழிநடத்துங்கள் + தளங்களை விலக்கு + ஆட்ச்பாட்டுக்கு நேரடியாக திறக்க தளங்கள் பட்டியல் + I2P பகிர்வை அனுமதிக்கவும் + TTL ஐ சரிசெய்யவும் + உள்ளக VPN ஐப் பயன்படுத்தி TTL 64 க்கு சரி செய்யப்படும். கர்னல் உதவி தேவையில்லை. TTL ஐ சரிசெய்ய dnscrypt அல்லது/மற்றும் tor ஐத் தொடங்கவும். + டோர் மறுதொடக்கம் செய்யுங்கள் + Xtables பூட்டுக்காக காத்திருங்கள் + இணைத்தல்… + நெட்வொர்க்கிற்காக காத்திருக்கிறது… + கொலை சுவிட்ச் காரணமாக இணையம் தடுக்கப்பட்டது. இணைப்பை அனுமதிக்க டோர், டி.என்ச்கிரிப்ட் அல்லது I2P ஐத் தொடங்குங்கள். அல்லது பொதுவான அமைப்புகளில் கொலை சுவிட்சை முடக்கவும். + அமைப்புகள் சேமிக்கப்பட்டன + குறிப்பிட்ட பண்புகளை நிறைவு செய்ய சேவையகங்கள் (நிலையான + தொலை மூலங்களிலிருந்து) தேவை + DNSCRYPT நெறிமுறையை செயல்படுத்தும் சேவையகங்களைப் பயன்படுத்தவும். + IPv4 ஐ அடையக்கூடிய சேவையகங்களைப் பயன்படுத்தவும் + வெளிச்செல்லும் பதிலாள் + அனைத்து TCP இணைப்புகளையும் உள்ளக TOR உள்வரும் SOCKS5 ப்ராக்சிக்கு வழிநடத்துங்கள். TOR UDP ஐ ஆதரிக்கவில்லை, எனவே TCP ஐ உண்மைக்கு அமைக்கவும். + பிற அமைப்புகள் + கணினி டிஎன்எச் அமைப்புகளைப் பயன்படுத்த டி.என்ச்கிரிப்ட் முயற்சிக்க வேண்டாம். தொடக்கவார் தீர்வுகளை நிபந்தனையின்றி பயன்படுத்துங்கள். + வடிப்பான்கள் + ஒரு பிரத்யேக சேவையகங்களுக்கு குறிப்பிட்ட களங்களுக்கான பாதை வினவல்கள். + வினவல் பதிவு + சொற்களஞ்சியத்தை குறைக்க, இந்த வினவல் வகைகளை பதிவு செய்ய வேண்டாம். எல்லாவற்றையும் பதிவு செய்ய காலியாக வைத்திருங்கள். + திறந்த வினவல் பதிவு + இல்லாத மண்டலங்களுக்கான பதிவு வினவல்கள். இந்த வினவல்கள் தீம்பொருள், உடைந்த/வழக்கற்றுப் போன பயன்பாடுகள் மற்றும் 3 வது தரப்பினருக்கு அவற்றின் இருப்பைக் குறிக்கும் சாதனங்கள் இருப்பதை வெளிப்படுத்தலாம். + சந்தேகத்திற்கிடமான பதிவு + சந்தேகத்திற்கிடமான பதிவைத் திறக்கவும் + தடுப்புப்பட்டியல் ஒரு வரிக்கு ஒரு வடிவத்தால் ஆனது. + ஐபி பிளாக்லிச்ட் + ஐபி தடுப்புப்பட்டியலை அழிக்கவும் + அனுமதிப்பட்டியலாளர் + மணிநேரங்களில் குறிப்பிட்ட தாமதத்திற்குப் பிறகு ரிலே பட்டியல் புதுப்பிக்கப்படும். + Dnscrypt சந்தேகத்திற்கிடமான பதிவு + பதிவு காலியாக உள்ளது + டொமைன் பெயர் இல்லாமல் புரவலன் பெயர்களுக்கான A மற்றும் AAAA வினவல்களுக்கு உடனடியாக பதிலளிக்கவும். + Dnscrypt-proxy.toml ஐ நேரடியாகத் திருத்தவும் + வெற்று பதிலுடன் ஐபிவி 6 தொடர்பான வினவல்களுக்கு உடனடியாக பதிலளிக்கவும். ஐபிவி 6 இணைப்பு இல்லாதபோது இது விசயங்களை வேகமாக்குகிறது, ஆனால் சில ச்டப் தீர்வுகளுடன் நம்பகத்தன்மை சிக்கல்களையும் ஏற்படுத்தும். + வெளியேறும் முனையைத் தேர்ந்தெடுக்கும்போது ஒருபோதும் பயன்படுத்த வேண்டிய நாட்டு குறியீடுகளின் பட்டியல், இது டோர் நெட்வொர்க்குக்கு வெளியே உங்களுக்கான போக்குவரத்தை வழங்குகிறது. + முடக்கப்பட்டால், IPv4 க்கு மேல் அடைவு சேவையகங்கள் மற்றும் நுழைவு முனைகளுடன் இணைப்பதை TOR தவிர்க்கும். + பைபாசுக்கு புரவலன் அல்லது ஐபி + நாள் பயன்முறை + தானி + ரூட் சலுகைகளைப் பயன்படுத்துங்கள் + ரூட் மூலம் தொகுதிகளை இயக்கவும் + லேன் சாதன ஐபி முகவரி + இரட்டை பயன்பாடுகள், MIUI, தீவு, தங்குமிடம் மற்றும் பணி சுயவிவர பயன்பாடுகளுக்கான உதவி. VPN பயன்முறையில் பயனற்றதாக இருக்கலாம் + IPTABLES ஐத் தேர்ந்தெடுக்கவும் + பதிவு செய்ய ரூட் கட்டளைகளை சேமிக்கவும் + ஐபி தடுப்புப்பட்டியல் ஒரு வரிக்கு ஒரு வடிவத்தால் ஆனது. + டி.என்ச்கிரிப்ட் வினவல் பதிவு + உள்ளார்ந்த சார்பு திட்டம் உங்கள் உதவியை நாடுகிறது. காப்பீடு அம்சங்களை வாங்க சரி என்பதை அழுத்தவும். + சாம் பிரிட்ச் துறை. + மன்னிக்கவும், ஆனால் நீங்கள் வாங்கியதை உறுதிப்படுத்த முடியாது. 3 நாட்களுக்குப் பிறகு நீங்கள் பணத்தைத் திரும்பப் பெறுவீர்கள். + முரட்டு டி.எச்.சி.பி கண்டறிதல் + அறிவிப்பைக் காட்டு + காட்ட அனுமதிக்கப்படாத எனக் குறிக்கப்பட்ட அனைத்து செய்திகளையும் காட்டு + தானி + TTL ஐ சரிசெய்யவும், கண்ணுக்கு தெரியாதவருடன் இணைக்க LAN சாதனத்தை உள்ளமைக்கவும். இயல்புநிலை நுழைவாயிலைப் பயன்படுத்தவும்: %1$s, DNS சேவையகம்: %2$s. + ரூட் கட்டளைகளை இயக்குதல்… + அறியாத DNS-Over-HTTPS நெறிமுறையை செயல்படுத்தும் சேவையகங்களைப் பயன்படுத்தவும். + முறை அடிப்படையிலான அனுமதிப்பட்டியல் (தடுப்புப்பட்டியல் பைபாச்) + அனுமதிப்பட்டியை இறக்குமதி செய்யுங்கள் + மூலங்கள் + Dnscrypt-proxy.toml இல் விருப்பம் இல்லை! + முடிந்தது! இறக்குமதி செய்யப்பட்ட %d விதிகள். + காண்பிக்க பல விதிகள். முதல் 1000 மட்டுமே காட்டப்பட்டுள்ளது. + சாக்ச் வெளிச்செல்லும் பதிலாள் + தெளிவான தற்காலிக சேமிப்பு + மதிப்புமிக்க மூலம் பயன்படுத்த பயன்பாடுகள் + உள்வரும் இணைப்புகளுக்கான திசைவி வெளிப்புற ஐபி (இயல்புநிலை: SSU2 இயக்கப்பட்டிருந்தால் ஆட்டோ). + IPv4 மூலம் தகவல்தொடர்புகளை இயக்கவும். + திசைவி வெள்ளப்பெருக்கு இருக்கும். + உள்வரும் இணைப்புகளைக் கேட்க துறைமுகம் (இயல்புநிலை: ஆட்டோ (சீரற்ற)). + HTTP பதிலாள் துறைமுகம் + கேட்க வேண்டிய துறை (HTTP ப்ராக்சி). + HTTP பதிலாள் மேலோடை அவுட் பதிலாள் முகவரி (http: //false.i2p போன்றவை) + சாக்ச் அவுட்பிராக்சி துறைமுகம் + எல்கமல் முன்கூட்டிய அட்டவணைகளைப் பயன்படுத்தவும். + டி.என்ச்கிரிப்ட் ஆண்ட்ராய்டு சிச்டத்தால் கொல்லப்பட்டதாகத் தெரிகிறது. உங்கள் இணைய இணைப்பு மீட்டெடுக்கப்பட்டது. சாதன அமைப்புகளை சரிபார்க்கவும்! + பூட்டப்பட்ட கட்டுப்பாடு + ஆட்ச்பாட் + புதுப்பிப்பு + பின்னர் + புதிய பதிப்பு: + உள்ளார்ந்த இந்த அதிகாரப்பூர்வமற்ற பதிப்பு போல் தெரிகிறது. தயவுசெய்து அதை கவனித்துக் கொள்ளுங்கள்! + சார்பு பதிப்பிற்கு மட்டுமே + நன்கொடை + உள்ளார்ந்த சார்பு திட்டம் உங்கள் உதவியை நாடுகிறது. தயவுசெய்து நன்கொடை பக்கத்தைப் பார்வையிடவும் அல்லது நீங்கள் ஏற்கனவே பெற்ற காப்பீடு குறியீட்டை உள்ளிடவும். + குறியீட்டை உள்ளிடவும் + டோர் உடன் ஐபி மறைக்கவும் + மீட்டமை + தனிப்பயன் சேவையகத்தைச் சேர்க்கவும் + தவறான தனிப்பயன் சேவையக உள்ளமைவு. SDNS புலத்தை சரிபார்க்கவும். + பதிலாள் சேவையகம்: + மாற்றங்களைச் சேமிக்க விரும்புகிறீர்களா? இது கண்ணுக்கு தெரியாத புரோவை உடைக்கக்கூடும். + விண்ணப்பங்கள் பட்டியல் நேரடியாக திறக்க. இந்த பயன்பாடுகள் இணையத்துடன் இணைக்க TOR அல்லது ப்ராக்சியைப் பயன்படுத்தாது! + ஊதா I2P க்கு ப்ராக்சியைப் பயன்படுத்தவும் + உள்ளக பிணையம், வெங்காயம் மற்றும் I2P தளங்களுக்கான இணைப்புகளை அனுமதிக்கவும் + அனைத்தையும் தேர்வு செய்யுங்கள் + இணைய இணைப்பு சோதனை + தொலைநிலை பட்டியலை மாற்றவும் + கட்டுப்பாடு திறக்கப்பட்டது + பதிவிறக்கத்தை ரத்துசெய் + புதுப்பிப்பு சேவையகம் கிடைக்கவில்லை. + TOR ஐத் தவிர்ப்பதற்கான பயன்பாடுகளுக்கு ப்ராக்சியைப் பயன்படுத்தவும் + அனைத்தும் + அனைத்தையும் தெரிவுசெய் + திருத்த நீண்ட அழுத்தவும். + ரிலேக்கள் அநாமதேய + விதியைச் சேர்க்கவும் + மொத்தம்: %d விதிகள் + முகவரி ஐச் சேர்க்கவும் + டோர் ஓச்ட்களுக்கான அண்மைக் கால இணைப்புகளைக் கண்காணிக்கும் மற்றும் ஒவ்வொன்றிற்கும் ஒரே வெளியேறும் முனையை மீண்டும் பயன்படுத்த முயற்சிக்கும். இந்த விருப்பம் முடிவற்ற கேப்ட்சாவைத் தவிர்க்க உதவும். + KBPS இல் அலைவரிசை வரம்பு. + போக்குவரத்துக்கான அலைவரிசை வரம்பின் அதிகபட்சம். 0–100. + திசைவி போக்குவரத்து சுரங்கங்களை ஏற்காது, போக்குவரத்து போக்குவரத்தை முழுமையாக முடக்குகிறது. + SSU போக்குவரத்து நெறிமுறையை இயக்கவும் (UDP ஐப் பயன்படுத்தவும்). + Http outproxy + வழக்கமான இணையத்திற்கு நுழைவாயில் + கேட்க வேண்டிய துறை (சாக்ச் ப்ராக்சி). + சாக்ச் அவுட்பிராக்சி + சாக்ச் அவுட்பிராக்சி முகவரி + சாம் இடைமுகம் + சாம் இடைமுக துறை + குறியாக்கவியல் + ஒத்திருக்கிறது + SU3 கையொப்பத்தை சரிபார்க்கவும். + திறந்த கோப்பு விவரிப்பாளர்களின் வரம்பு எண் (0 - கணினி வரம்பைப் பயன்படுத்தவும்). + முகவரி நூல் + ஆரம்ப அமைப்பிற்கான முகவரி புத்தக சந்தா முகவரி. + I2pd.conf இல் விருப்பம் இல்லை! + Tunnels.conf ஐ நேரடியாகத் திருத்தவும் + தகவல் + குழந்தை பூட்டு + தயவுசெய்து இன்விசிபிள் புரோவை மறுதொடக்கம் செய்து முழுமையான நிறுவலை அனுமதிக்கவும்! நிறுவிய பின் ஆட்ச்பாட்டை மறுசீரமைத்த பிறகு, நீங்கள் அதைப் பயன்படுத்தினால்! + ஏற்கனவே இந்த குறியீட்டைக் கொண்டு மதிப்புமிக்க 3 நகல்களை நீங்கள் செயல்படுத்தியுள்ளதாகத் தெரிகிறது. அது தவறு என்றால், தயவுசெய்து டெவலப்பரைத் தொடர்பு கொள்ளவும். + புதுப்பிப்புகள் நிறுவப்பட்டன. + இன்விசிபிள் புரோ திட்டமும் அதன் எழுத்தாளரும் உதவிக்கு பாராட்டுக்களைத் தெரிவிக்கின்றனர். நீங்கள் பதிவிறக்கம் செய்து புரோவுக்கு மேம்படுத்த விரும்புகிறீர்களா? புதுப்பிப்பு பின்னணியில் தொடரும். + பயனர்பெயர்: + பயன்பாடுகளை விலக்கு + மண்டலம் + ஃபயர்வாலை செயல்படுத்த, நீங்கள் குறைந்தபட்சம் டி.என்ச்கிரிப்ட் அல்லது டோர் தொடங்க வேண்டும். + அதற்கு இணையத்தை அனுமதிக்கவா? + ஆட்டோச்டார்ட் டோர் + ஆட்டோச்டார்ட் i2p + சுணக்கம் + ஆட்டோச்டார்ட் சரியாக வேலை செய்யாவிட்டால் மட்டுமே தாமதத்தை (நொடி) பயன்படுத்தவும் + டி.என்ச்கிரிப்ட் சேவையகங்கள் + Dnscrypt சேவையகங்களைத் தேர்ந்தெடுக்கவும் + TOR அமைப்புகள் + ரூட்டிங் அமைப்புகள் + டோர் மூலம் அனைத்து போக்குவரத்தையும் வழிநடத்துங்கள் + அனைத்து போக்குவரத்தையும் உள்ளார்ந்த மூலம் வழிநடத்துங்கள் + பயன்பாடுகளைத் தேர்ந்தெடுக்கவும் + தளங்கள் பட்டியல் TOR உடன் திறக்க. இந்த நற்பொருத்தம் சி.டி.என் பின்னால் உள்ள தளங்களுக்கு வேலை செய்யாது + TOR உடன் பயன்படுத்த பயன்பாடுகள் பட்டியல் + பயன்பாடுகள் பட்டியலுடன் பயன்படுத்த பட்டியல் + தளங்களை விலக்கு + நேரடியாக திறக்க தளங்கள் பட்டியல். இந்த நற்பொருத்தம் சி.டி.என் பின்னால் உள்ள தளங்களுக்கு வேலை செய்யாது + பயன்பாடுகளை விலக்கு + இடைவெளியைப் புதுப்பிக்கவும் + விண்ணப்பங்கள் பட்டியல் நேரடியாக திறக்க + பாலங்கள் + நீங்கள் TOR நெட்வொர்க்குடன் இணைக்க முடியாவிட்டால் அதைப் பயன்படுத்தவும் + பாலங்கள் இயல்புநிலை பட்டியலைப் பயன்படுத்தவும் + பாலங்கள் சொந்த பட்டியலைப் பயன்படுத்தவும் + பாலங்கள் கோருங்கள் + பாலங்களைச் சேர்க்கவும் + பாலத்தைத் திருத்து + பி.எல்.எச் முதலில் பாலத்தை செயலிழக்கச் செய்யுங்கள்! + தவறான புரவலன் பெயர்! + கருப்பொருள் தேர்ந்தெடுக்கவும் + இரவு முறை + கணினி இயல்புநிலை + பைபாச் லேன் முகவரிகள் + லேன் இடங்களுக்கு டோர் பயன்படுத்த வேண்டாம் மற்றும் ஐஏஎன்ஏ ஒதுக்கப்பட்ட ஐபி தொகுதிகள் + மொழியைத் தேர்ந்தெடுக்கவும் + தொகுதி http + HTTP துறைமுகம் 80 உடன் இணைப்பை மறுக்கவும் + தானியங்கி புதுப்பிப்புகள் + நாள்தோறும் உள்ளுணர்வு புரோ மற்றும் தொகுதிகள் புதுப்பிப்புகளை சரிபார்க்கவும் + புதுப்பிப்பைச் சரிபார்க்கவும் + புதிய பதிப்புகள் இப்போது கிடைக்குமா என்று சரிபார்க்கவும் + டோர் மூலம் கண்டிப்பாக புதுப்பிக்கவும் + எம்ஐடிஎம் தாக்குதல் கண்டறிதல் + ஆர்ப் ச்பூஃபிங் கண்டறிதல் + வைஃபை நெட்வொர்க்குகளில் மேன்-இன்-நடுத்தர முரட்டு டி.எச்.சி.பி தாக்குதல்களைக் கண்டறியவும். + தாக்குதலின் போது இணைய இணைப்பு தடுக்கப்படும் + டி.என்.எச் மறுப்பு பாதுகாப்பை + தாக்குதல் கண்டறியப்படும்போது இணையத்தைத் தடுக்கும் + டிஎன்எச் மறுப்பு தாக்குதல் கண்டறியப்படும்போது தளத்தைத் தடுக்கும் + இண்டிசிபிள் புரோ அனைத்து இணைப்புகளையும் SOCKS5 பதிலாள் மூலம் செய்யும் + டோர், டி.என்ச்கிரிப்ட் மற்றும் ஊதா ஐ 2 பி ஆகியவை நிறுத்தப்படும் போது இணைய இணைப்பைத் தடுக்கிறது + விதிகளைப் புதுப்பிக்கவும் + பயன்பாடு இயங்காதபோது இணையத்தைத் தடுக்க VPN இல்லாமல் VPN இல்லாமல் எப்போதும் VPN மற்றும் தடுப்பு இணைப்புகளை இயக்கவும் + சாதன தூக்கத்தைத் தடுக்கவும் + சாதனம் டோச் பயன்முறையில் விழ அனுமதிக்காதீர்கள். ஆட்ச்பாட்டைப் பயன்படுத்தும் போது பயனுள்ளதாக இருக்கும். பேட்டரியை வடிகட்டுகிறது! + நிகழ்நேர பதிவுகள் + டிஎன்எச் தாவலில் பயன்பாட்டு இணைப்பு பதிவுகளைக் காண்பி + தளங்களைத் தேர்ந்தெடுக்கவும் + KB இல் கோர் கோப்பின் அதிகபட்ச அளவு (0 - கணினி வரம்பைப் பயன்படுத்தவும்). + HTTP/3 க்கான ஆதரவை இயக்கவும் (DOH3, HTTP வீச்சலகு QUIC). டி.என்ச்கிரிப்டைப் போலவே, ஆனால் பிற HTTP பதிப்புகளைப் போலல்லாமல், இது TCP க்கு பதிலாக UDP மற்றும் (பொதுவாக) துறைமுகம் 443 ஐப் பயன்படுத்துகிறது என்பதை நினைவில் கொள்க. + இறக்குமதி விதிகள் + வரம்புகள் + முகவரி புத்தக சந்தா முகவரி. + கோப்பு பதிவிறக்குகிறது + ஃபயர்வால் + புதிய பயன்பாடுகளுக்கான இணைப்புகளை அனுமதிக்கவும் + டோரிலிருந்து யுடிபியை விலக்குங்கள் + டோரிலிருந்து விலக்கு + TOR க்கு செல்லும் பாதை + முற்றிலும் விலக்கு + தளங்கள் ஐபி புதுப்பிக்க மணிநேரங்களில் காலம். ஆண்ட்ராய்டு 5.1 மற்றும் அதற்கு மேற்பட்டவர்களுக்கு. புதுப்பிப்பதை நிறுத்த 0 வைக்கவும். + TOR அமைப்புகளில் நுழைவு முனைகளை முடக்கு, இல்லையெனில் நீங்கள் பாலங்களைப் பயன்படுத்த முடியாது. + பாலங்களைப் பயன்படுத்த வேண்டாம் + தெளிவின்மை + ஐபிவி 6 பாலங்கள் + புதிய இயல்புநிலை டோர் பாலங்கள் கிடைக்கின்றன. அவற்றை புதுப்பிக்க விரும்புகிறீர்களா? + படத்திலிருந்து எழுத்துக்களை உள்ளிடவும் + புதிய பாலங்களைக் கோருகிறது + பிரிட்சச்.டொர்ப்ரோசெக்ட்.ஆர்சிலிருந்து உங்கள் புதிய பாலங்கள்: + சேமி + மூடு + பிளாக் ஆட்ச்பாட் http + டி.என்ச்கிரிப்ட், டோர் மற்றும் ஐ 2 பி தொகுதிகளுக்கு ரூட் சலுகைகளைப் பயன்படுத்தவும். இந்த அம்சத்தை இயக்குவது தொகுதிகள் நிர்வகிக்கப்படாதது மற்றும் இணைப்பு சிக்கல்களை ஏற்படுத்தக்கூடும்! + பொருந்தக்கூடிய பயன்முறை + தனிப்பயன் ரோம் கொண்ட உங்கள் சாதனம் மற்றும் தொடக்க பொத்தானை அழுத்தும்போது இணைப்பு இழந்துவிட்டால் இயக்கவும் + செய்திகளுக்கு உதவுங்கள் + பிசியான பாக்சைப் பயன்படுத்த வேண்டாம் + டோர் வழியாக செல்லும் + ஆட்ச்பாட்டுக்கு TOR உடன் திறக்க தளங்கள் பட்டியல் + IPv6 ஐ அடையக்கூடிய சேவையகங்களைப் பயன்படுத்தவும் + குறிப்பிட்ட பண்புகளை நிறைவு செய்ய தொலை மூலங்களால் வரையறுக்கப்பட்ட சேவையகங்கள் தேவை + சாதன IPTABLES ஐப் பயன்படுத்தவும் + Dnscrypt, tor, i2p ஆகியவை பாதுகாக்கப்படுகின்றன. மறைக்க வேண்டாம். + சேவையகம் DNS பாதுகாப்பு நீட்டிப்புகளை (DNSSEC) ஆதரிக்க வேண்டும். + தயவுசெய்து காத்திருங்கள்… + உலகளாவிய அமைப்புகள் + சேவையகம் பயனர் வினவல்களை பதிவு செய்யக்கூடாது (அறிவிப்பு). + பதிலாள் துறைமுகம் + ஆடை விதிகளை அழிக்கவும் + கிளையன்ட் வினவல்களை ஒரு கோப்பில் பதிவு செய்யுங்கள். + பகிர்தல் விதிகள் + குளோக்கிங் ஒரு குறிப்பிட்ட பெயருக்கு முன் வரையறுக்கப்பட்ட முகவரியை வழங்குகிறது. ஓச்ட்ச் கோப்பாக செயல்படுவதோடு கூடுதலாக, இது வேறு பெயரின் ஐபி முகவரியையும் திருப்பித் தரலாம். இது cname தட்டையானது செய்யும். + சந்தேகத்திற்கிடமான வினவல்கள் பதிவு + தடுப்புப்பட்டியல் + தடுப்புப்பட்டியலை இறக்குமதி செய்யுங்கள் + டொமைன் பட்டியலை இறக்குமதி செய்யுங்கள். நீங்கள் பல கோப்புகளைத் தேர்ந்தெடுக்கலாம், நகல் கோடுகள் அகற்றப்படும். + அனுமதிப்பட்டியலை அழிக்கவும் + முடிந்தது! விதிகள் அழிக்கப்படுகின்றன. + இந்த விருப்பம் சில வகையான போக்குவரத்து பகுப்பாய்வுகளுக்கு எதிராக பாதுகாக்க டோர் திணிப்பைப் பயன்படுத்துவதை நிர்வகிக்கிறது. அலைவரிசையை சேமிக்க முடக்கு. + உங்கள் சாதாரண சுற்றுகளில் முதல் ஆப்பிற்கு பயன்படுத்த நாட்டின் குறியீடுகளின் பட்டியல். + நீங்கள் நுழைவு முனைகளைத் தேர்ந்தெடுக்க விரும்பினால் டோர் பாலங்களை முடக்கு. + சாக்சுக்கு பதிலாக HTTP இணைப்பு நெறிமுறையைப் பயன்படுத்தி பதிலாள் இணைப்புகளைக் கேட்க இந்த போர்ட்டைத் திறக்கவும். + வெளிப்படையான பதிலாள் + Tor.conf ஐ நேரடியாகத் திருத்தவும் + தேர்வை அகற்று + பொதுவான அமைப்புகள் + UPNP ஐ இயக்கவும் அல்லது முடக்கவும். + போக்குவரத்து சுரங்கங்களின் அதிகபட்ச எண்ணிக்கையை மீறவும். இயல்பாக 2500. + இந்த பயன்பாட்டின் கட்டுப்பாட்டை நீங்கள் பூட்டலாம். கடவுச்சொல்லை உள்ளிடவும் அல்லது முந்தையதைப் பயன்படுத்தவும். + பதிவுகளுக்கான பாதை: + பதிவுகள் சேமிக்கப்பட்டன. டெவலப்பருக்கு inviziblelogs.txt ஐ அனுப்பவும். கோப்புறையில் பதிவுகளை நீங்கள் காணலாம்: + VPN பயன்முறை பிழை! + இணைக்கப்பட்ட சாதனத்தின் அமைப்புகளில் பதிலாள் சேவையகத்தின் பயன்பாட்டை உள்ளமைக்கவும். இயல்புநிலை மதிப்புகள்: ஐபி: 10.1.10.1 போர்ட்: 8118 + விபத்து அறிக்கை கண்டுபிடிக்கப்பட்டுள்ளது. கண்ணுக்கு தெரியாத சிறந்ததாக்க அதை டெவலப்பருக்கு அனுப்ப விரும்புகிறீர்களா? + புதிய டோர் அடையாளம் + நீங்கள் உள்ளக ப்ராக்சியைப் பயன்படுத்துகிறீர்கள் என்றால் குறைந்தபட்சம் பதிலாள் உரிமையாளர் பயன்பாட்டைத் தேர்ந்தெடுக்கவும். + TOR க்கு ப்ராக்சியைப் பயன்படுத்தவும் + உங்கள் சாதனத்திற்கு ARP SPOFING கண்டறிதல் ஆதரிக்கப்படவில்லை! + UID ஆல் வரிசைப்படுத்துங்கள் + பயனர் + பெயரால் வரிசைப்படுத்துங்கள் + ஃபயர்வால் ஆன்/ஆஃப் + சிஎச்எம் நெட்வொர்க்குகளில் இணைப்புகளை அனுமதிக்கவும் + ஃபயர்வால் அமைப்புகள் + இணையத்துடன் இணைக்கக்கூடிய பயன்பாடுகள் மட்டுமே காட்டப்படுகின்றன. + டோர் ஐபியை மாற்றவும் + SPOOF SNI + ஒரு குறிப்பிட்ட நேரத்திற்கு கிளையன்ட் செயல்பாட்டைக் காணாவிட்டால் டோர் ஒரு செயலற்ற நிலைக்குள் நுழைகிறார். குறைந்தது 10 நிமிடங்கள் இருக்க வேண்டும் + இந்த விருப்பத்தை செயல்படுத்த, நீங்கள் 80 மற்றும் 443 துறைமுகங்களுடன் மட்டுமே பாலங்களை முடக்க வேண்டும் அல்லது பாலங்களை பயன்படுத்த வேண்டும். + இயக்கப்பட்டால், டோர் இரண்டு சேவையகங்களை வைக்க மாட்டார், அதன் ஐபி முகவரிகள் ஒரே சுற்றுக்கு மிக நெருக்கமாக உள்ளன. தற்போது, இரண்டு முகவரிகள் ஒரே /16 வரம்பில் பொய் சொன்னால் மிக நெருக்கமாக இருக்கும். + சாக்ச் பேசும் பயன்பாடுகளின் இணைப்புகளைக் கேட்க இந்த துறைமுகத்தைத் திறக்கவும். + VPN பயன்முறை செயலில் உள்ளது + VPN பயன்முறை முடக்கப்பட்டுள்ளது + இன்விசிபிள் புரோ %1$s ஐத் தொடங்க முடியாது! %2$s அமைப்புகளை மீட்டமைக்க முயற்சிக்கவும். இது உதவவில்லை என்றால், தயவுசெய்து உங்கள் சாதனத்தை மறுதொடக்கம் செய்யுங்கள். + எல்லா பயன்பாடுகளையும் காட்டு + SSU2 போக்குவரத்து நெறிமுறையை இயக்கவும் (UDP ஐப் பயன்படுத்தவும்). + சேர்க்க அழுத்தவும். + அறிவிப்புகளைப் புதுப்பிக்கவும் + வெற்றிகரமான இணைப்பு. பிங் %s எம். + ஆர்ப் ச்பூஃபிங் கண்டறியப்பட்டது! + முரட்டு டி.எச்.சி.பி கண்டறியப்பட்டது! + முனைகள் + பயன்பாட்டு தொகுதிகளை நிர்வகிக்க பின்வரும் கட்டளையைப் பயன்படுத்தவும்: m ஒளிபரப்பு -a பான்.அலெக்சாண்டர்.டார்ட்ன்ச்கிரிப்ட்.செல்_ச்கிரிப்ட்_கான்ட்ரோல் -ஈ டிஎன்ச்கிரிப்ட் 1 --ei டோர் 1 --ei i2p 1 %s எங்கே 1 -தொடங்குகிறது, 0 -தொகுதியை நிறுத்துகிறது. + எந்த நேரத்திலும் கணினி டி.என்ச்கிரிப்ட், டோர் அல்லது ஐ 2 பி ஆகியவற்றை நிறுத்துவதைத் தடுக்க ஆண்ட்ராய்டு பேட்டரி உகப்பாக்கத்திலிருந்து இன்விசிபிள் புரோவை விலக்கவும். MIUI போன்ற சில சிறப்பு அமைப்புகளுக்கு கூடுதல் படிகள் தேவைப்படலாம். + பிணைய தரவு சேமிப்பாளரை முடக்கவா? + தயவுசெய்து பின்னணி தரவு பயன்பாட்டை அனுமதிக்கவும், தரவு சேமிப்பாளர் இயக்கத்தில் இருக்கும்போது தரவு பயன்பாட்டை அனுமதிக்கவும். மென்மையான நிகழ்நிலை அனுபவத்திற்கு இது தேவை. + சாதன பிச்பாக்சைப் பயன்படுத்தவும் + பயன்பாட்டு பிச்பாக்சைப் பயன்படுத்தவும் + ஆட்ச்பாட்-சோதனை + துவக்கத்தில் பகிரத் தொடங்குங்கள் + டோர் பகிர்வை அனுமதிக்கவும் + திறந்த டெதரிங் கட்டமைப்பு + Dnscrypt ஐ மறுதொடக்கம் செய்யுங்கள் + பல பயனர் உதவி + பயன்பாடு iptables ஐப் பயன்படுத்தவும் + IPTABLES விதிகளின் ஒரே நேரத்தில் மாற்றத்தைத் தடுக்க பிரத்யேக IPTABLES பூட்டைப் பெறும் வரை காத்திருங்கள். + மேம்பட்ட பயனருக்கு மட்டுமே! + குறைந்தது ஒரு சேவையகத்தைத் தேர்ந்தெடுக்கவும்! + கேட்க உள்ளக துறைமுகம். + இது ஒரு சாதாரண, மறைகுறியாக்கப்படாத டிஎன்எச் தீர்வாகும், இது ஆரம்ப தீர்வுகள் பட்டியலை மீட்டெடுக்கும்போது ஒரு காட்சி வினவல்களுக்கு மட்டுமே பயன்படுத்தப்படும், மேலும் கணினி டிஎன்எச் உள்ளமைவு வேலை செய்யவில்லை என்றால் மட்டுமே. பட்டியல்கள் ஏற்கனவே தற்காலிக சேமிக்கப்பட்டிருந்தால் அது ஒருபோதும் பயன்படுத்தப்படாது. + நீங்கள் ஐபிவி 6-மட்டும் நெட்வொர்க்கில் இருந்தால் மற்றும் ஐபிவி 4 தளங்கள் கிடைக்கவில்லை என்றால் இந்த விருப்பத்தை செயல்படுத்தவும். வேறுவிதமாக அதை இயக்க வேண்டாம், அல்லது நீங்கள் எதையும் இணைக்க முடியாது. + பயன்படுத்தப்பட்ட நிலையான ஐபிவி 6 முன்னொட்டுகளின் தொகுப்பு. + பகிர்தல் விதிகளை இறக்குமதி செய்யுங்கள் + பகிர்தல் விதிகள் கோப்பை இறக்குமதி செய்யுங்கள். நீங்கள் பல கோப்புகளைத் தேர்ந்தெடுக்கலாம், நகல் கோடுகள் அகற்றப்படும். + குளோக்கிங் விதிகளை இறக்குமதி செய்யுங்கள் + குளோக்கிங் விதிகள் கோப்பை இறக்குமதி செய்க. நீங்கள் பல கோப்புகளைத் தேர்ந்தெடுக்கலாம், நகல் கோடுகள் அகற்றப்படும். + வினவல் பதிவு + முறை அடிப்படையிலான தடுப்பு (பிளாக்லிச்ட்) + டொமைன் பட்டியல் அல்லது புரவலன் கோப்பை இறக்குமதி செய்யுங்கள். நீங்கள் பல கோப்புகளைத் தேர்ந்தெடுக்கலாம், நகல் கோடுகள் அகற்றப்படும். + தடுப்புப்பட்டியலை அழிக்கவும் + முறை அடிப்படையிலான ஐபி தடுப்பு (ஐபி பிளாக்லிச்ட்) + ஐபி பிளாக்லிச்ட்டை இறக்குமதி செய்யுங்கள் + ஐபி பிளாக்லிச்ட்டை இறக்குமதி செய்யுங்கள். நீங்கள் பல கோப்புகளைத் தேர்ந்தெடுக்கலாம், நகல் கோடுகள் அகற்றப்படும். + தடுப்புப்பட்டியலாளர்கள் போன்ற வடிவங்களை அனுமதிப்பட்டி கலைஞர் ஆதரிக்கிறார். ஒரு பெயர் ஒரு அனுமதிப்பட்டியலுடன் பொருந்தினால், அதனுடன் தொடர்புடைய அமர்வு பெயர்களையும் ஐபி வடிப்பான்களையும் கடந்து செல்லும். + சேவையகங்கள் + கிடைக்கக்கூடிய சேவையகங்களின் தொலைநிலை பட்டியல்கள். + குறியீட்டை உள்ளிடவும் + கட்டுப்படுத்தி அல்லது ஆட்டோமாஃபோச்டன்ரெசோல்வ் அம்சத்திலிருந்து ஒரு மெய்நிகர் கட்டளை காரணமாக டோர் ஒரு மெய்நிகர் (பயன்படுத்தப்படாத) முகவரியை ஒதுக்க வேண்டியிருக்கும் போது, டோர் இந்த வரம்பிலிருந்து ஒதுக்கப்படாத முகவரியை எடுக்கிறார். + கிடைக்கும்போது உள்ளமைக்கப்பட்ட மறையீட்டு வன்பொருள் முடுக்கம் பயன்படுத்த முயற்சிக்கவும். + பூச்சியமற்றதாக இருந்தால், வட்டுக்கு குறைவாக எழுத முயற்சிக்கவும். + அமைக்கப்பட்டால், டோர் மிக நீண்ட காலமாக இணைப்புகளைத் திறக்காது அல்லது திறந்திருக்காது, மேலும் இந்த இணைப்புகளில் குறைவான திணிப்பை அனுப்பும். அலைவரிசையை சேமிக்க இயக்கவும். + வெளியேறும் முனையாக பயன்படுத்த நாட்டுக் குறியீடுகளின் பட்டியல் - அதாவது, டோர் நெட்வொர்க்குக்கு வெளியே உங்களுக்காக போக்குவரத்தை வழங்கும் ஒரு முனை. + ஒரு சுற்று கட்டும் போது தவிர்க்க வேண்டிய நாட்டு குறியீடுகளின் பட்டியல். + ச்ட்ரிக்ட்நோட்ச் இயக்கப்பட்டிருந்தால், நீங்கள் உருவாக்கும் அனைத்து சுற்றுகளுக்கும் பின்பற்ற வேண்டிய தேவையாக TOR மட்டுமே விலக்கு விருப்பத்தை மட்டுமே கருதுகிறது, அவ்வாறு செய்தாலும் கூட உங்களுக்காக செயல்பாட்டை உடைக்கும். + இயக்கப்பட்டிருந்தால், உங்கள் ஃபயர்வால் அனுமதிக்கும் துறைமுகங்களில் இயங்கும் ORS க்கு மட்டுமே வெளிச்செல்லும் இணைப்புகளை TOR உருவாக்கும் (இயல்புநிலை 80 மற்றும் 443 வரை). + ஒவ்வொரு எண் விநாடிகளும் ஒரு புதிய சுற்று உருவாக்கலாமா என்பதைக் கருத்தில் கொள்ளுங்கள். + பெரும்பாலான நொடிகளுக்கு முன்பு முதன்முதலில் பயன்படுத்தப்பட்ட ஒரு சுற்று மீண்டும் பயன்படுத்த தயங்க, ஆனால் ஒரு புதிய ச்ட்ரீமை ஒருபோதும் மிகவும் பழமையான ஒரு சுற்றுக்கு இணைக்க வேண்டாம். + சாக்ச் பதிலாள் + Http சுரங்கப்பாதை + வெளிப்படையான பதிலாள் இணைப்புகளைக் கேட்க இந்த துறைமுகத்தைத் திறக்கவும். + டி.என்.எச் + யுடிபி டிஎன்எச் கோரிக்கைகளை கேட்க இந்த துறைமுகத்தைத் திறந்து, அவற்றை அநாமதேயமாக தீர்க்கவும். + டி.என்ச்கிரிப்ட் இணையத்துடன் இணைக்க முடியாது என்று தெரிகிறது. நீங்கள் மற்றொரு டி.என்ச்கிரிப்ட் சேவையகங்களைத் தேர்வுசெய்ய முயற்சி செய்யலாம். அவற்றை பட்டியல் -> வேகமான அமைப்புகள் -> டி.என்ச்கிரிப்ட் சேவையகங்களைத் தேர்ந்தெடுக்கவும் + SOCKS5 ப்ராக்சியைப் பயன்படுத்த நீங்கள் கட்டமைத்துள்ளீர்கள், ஆனால் பதிலாள் இயங்குவதாகத் தெரியவில்லை. உங்கள் அமைப்புகளை சரிபார்க்கவும்! + டோர் இணையத்துடன் இணைக்க முடியாது என்று தெரிகிறது. ஐ.எச்.பி டோர் இணைப்புகளைத் தடுக்கலாம். நீங்கள் டோர் பிரிட்ச்களைப் பயன்படுத்த முயற்சி செய்யலாம். அவற்றை பட்டியல் -> வேகமான அமைப்புகள் -> பாலங்களில் காணலாம் + பதிவுகளை சேகரிக்கவும் + புதுப்பிப்பு + இசைவு இல்லாத தொகுதிகள் சில புதுப்பிப்புக்கு தயாராக உள்ளன. நீங்கள் அதைப் புதுப்பிக்க விரும்புகிறீர்களா? இது உங்கள் தொகுதிகள் அமைப்புகளை மேலெழுதும்! + உள்ளார்ந்த சார்பு புதுப்பிப்பு கிடைக்கிறது. நீங்கள் அதை பதிவிறக்கம் செய்து புதுப்பிக்க விரும்புகிறீர்களா? புதுப்பிப்பு பின்னணியில் தொடரும். + டி.என்ச்கிரிப்ட் புதுப்பிப்பு கிடைக்கிறது. நீங்கள் அதை பதிவிறக்கம் செய்து புதுப்பிக்க விரும்புகிறீர்களா? புதுப்பிப்பு பின்னணியில் தொடரும். + டோர் புதுப்பிப்பு கிடைக்கிறது. நீங்கள் அதை பதிவிறக்கம் செய்து புதுப்பிக்க விரும்புகிறீர்களா? புதுப்பிப்பு பின்னணியில் தொடரும். + ஊதா I2P புதுப்பிப்பு கிடைக்கிறது. நீங்கள் அதை பதிவிறக்கம் செய்து புதுப்பிக்க விரும்புகிறீர்களா? புதுப்பிப்பு பின்னணியில் தொடரும். + பிழையைப் புதுப்பிக்கவும். + புதுப்பிப்புகள் எதுவும் கிடைக்கவில்லை. + கடைசி சோதனை: + புதுப்பிப்புகள் காணப்பட்டன. + உங்கள் புரோ குறியீடு தவறு என்று தெரிகிறது. டெவலப்பரைத் தொடர்பு கொள்ளவும். + நீங்கள் ஒரு நாளைக்கு 5 முறைக்கு மேல் உள்ளார்ந்த சார்பு புதுப்பிப்புகளை சரிபார்த்தீர்கள். தயவுசெய்து பின்னர் மீண்டும் முயற்சிக்கவும். + வருகை + இன்விசிபிள் புரோவைத் தேர்ந்தெடுத்ததற்கு நன்றி. இது உங்கள் தனியுரிமையை மேம்படுத்துவதோடு மென்மையான நிகழ்நிலை அனுபவத்தை வழங்கும் என்று நான் நம்புகிறேன்.\n\n இண்டிசிபிள் புரோவில் டோர், டி.என்ச்கிரிப்ட் மற்றும் ஊதா ஐ 2 பி ஆகியவை தொகுதிகள் அடங்கும்.\n\n உங்கள் சாதனம் வேரூன்றினால், அல்லது இணைய போக்குவரத்தை நேரடியாக TOR, DNSCRYPT மற்றும் I2P நெட்வொர்க்குகளுக்கு வழங்க உள்ளக VPN ஐப் பயன்படுத்தினால், இன்விசிபிள் புரோ ரூட் பயன்படுத்தலாம்.\n\n தனியுரிமைக் கொள்கை:\n அன்டிசிபிள் புரோ எந்தவொரு தனிப்பட்ட அல்லது உணர்திறன் கொண்ட பயனர் தரவையும் சேகரிக்கவோ பகிரவோ இல்லை.\n\n பதிப்புரிமை © 2019-2025\n கார்மாடின் ஒலெக்சாண்டர்\n invisible.soft@gmail.com\n invisebible.net/en/privacy + இயங்கும் சேவைகள் + டோர் அடையாளம் மாறிவிட்டது + பதிலாள் போர்ட்: + Dnscrypt க்கு ப்ராக்சியைப் பயன்படுத்தவும் + ப்ராக்சியுடன் இணைக்க முடியாது: %s + மேன்-இன்-நடுத்தர தாக்குதல் கண்டறியப்பட்டது! உங்கள் தரவை உள்ளக நெட்வொர்க்கில் மற்றொரு சாதனத்தால் தடுக்கலாம். அணைக்கவும், சில வினாடிகள் காத்திருந்து, வைஃபை இயக்கவும். தற்போதைய வைஃபை ஆட்ச்பாட்டைப் பயன்படுத்துவது பாதுகாப்பற்றதாக இருக்கலாம்! + டிஎன்எச் மறுப்பு + சாத்தியமான டிஎன்எச் மறுப்பு தாக்குதல் கண்டறியப்பட்டது! தளம் %s தடுக்கப்பட்டுள்ளன. + வைஃபை நெட்வொர்க்குகளில் இணைப்புகளை அனுமதிக்கவும் + ரோமிங் செய்யும் போது இணைப்புகளை அனுமதிக்கவும் + VPN இயக்கப்பட்ட இணைப்புகளை அனுமதிக்கவும் + புதிதாக நிறுவப்பட்ட பயன்பாடுகளுக்கான இணைய இணைப்புகள் அனுமதிக்கப்படுகின்றன. + புதிதாக நிறுவப்பட்ட பயன்பாடுகளுக்கான இணைய இணைப்புகள் தடுக்கப்பட்டுள்ளன. + பயன்பாடு இணையத்துடன் இணைக்க முடியுமா இல்லையா என்பதைப் பொருட்படுத்தாமல் பயன்பாடுகள் காண்பிக்கப்படுகின்றன. + விரைவான அமைப்புகளில் 3 ஓடுகளுக்கு மேல் சேர்த்துள்ளீர்கள். தயவுசெய்து 3 ஐ மட்டும் விடுங்கள். 3 க்கும் மேற்பட்ட ஓடுகளைச் சேர்ப்பது பயன்பாட்டுக் கட்டுப்பாட்டில் சிக்கல்களை ஏற்படுத்தும். + பட்டியல் காலியாக உள்ளது + Rep புதுப்பிக்க இழுக்கவும் + பயன்பாட்டின் செயல்பாடு, பயன்பாட்டு கட்டுப்பாடு மற்றும் கண்டறியப்பட்ட தாக்குதல்கள் பற்றிய முக்கியமான தகவல்களைக் காண்பிப்பதற்கு அறிவிப்புகள் தேவை. கண்ணுக்கு தெரியாத அறிவிப்புகளை அனுமதிக்க விரும்புகிறீர்களா? + திருத்த அழுத்தவும். + அநாமதேய ரிலேக்கள் பயன்படுத்தப்படவில்லை. + தொலைநிலை பட்டியலைச் சேர்க்கவும் + உள்ளக பட்டியலைச் சேர்க்கவும் + உள்ளக பட்டியலை மாற்றவும் + விதிகள் diff --git a/tordnscrypt/src/main/res/values-tr/strings.xml b/tordnscrypt/src/main/res/values-tr/strings.xml index 668747a2a..6f7390d23 100644 --- a/tordnscrypt/src/main/res/values-tr/strings.xml +++ b/tordnscrypt/src/main/res/values-tr/strings.xml @@ -426,7 +426,7 @@ \n\n\tInviZible Pro, aygıtınız root ayrıcalıklarına sahipse root kullanabilir veya İnternet trafiğini Tor, DNSCrypt ve I2P ağlarına iletmek için yerel bir VPN kullanır. \n\n\tGizlilik Politikası: \n\tInviZible Pro, kişisel veya hassas kullanıcı verilerini toplamaz veya paylaşmaz. - \n\n\tTelif Hakkı © 2019-2024 + \n\n\tTelif Hakkı © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy diff --git a/tordnscrypt/src/main/res/values-uk/strings.xml b/tordnscrypt/src/main/res/values-uk/strings.xml index cab636b46..ba43bfc7a 100644 --- a/tordnscrypt/src/main/res/values-uk/strings.xml +++ b/tordnscrypt/src/main/res/values-uk/strings.xml @@ -436,7 +436,7 @@ \n\n\tInviZible Pro може використовувати root, якщо ваш пристрій має привілеї root, або використовувати локальний VPN для доставки інтернет-трафіку напряму в мережі Tor, DNSCrypt і I2P. \n\n\tПолітика конфіденційності: \n\tInviZible Pro не збирає і не поширює будь-які особисті або конфіденційні дані користувачів. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy @@ -523,4 +523,6 @@ Посилання Правила будуть оновлені після зазначеної затримки у годинах. Увімкніть Постійний VPN і Блокувати з\'єднання без VPN для InviZible Pro, щоб заблокувати інтернет, коли додаток не запущений + Ви налаштували використання Socks5 проксі, але проксі, схоже, не працює. Будь ласка, перевірте ваші налаштування! + Використовувати проксі для додатків, що працюють без Tor diff --git a/tordnscrypt/src/main/res/values-v21/styles.xml b/tordnscrypt/src/main/res/values-v21/styles.xml index fff6b585a..328b0417a 100644 --- a/tordnscrypt/src/main/res/values-v21/styles.xml +++ b/tordnscrypt/src/main/res/values-v21/styles.xml @@ -14,7 +14,7 @@ ~ You should have received a copy of the GNU General Public License ~ along with InviZible Pro. If not, see . ~ - ~ Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + ~ Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com --> diff --git a/tordnscrypt/src/main/res/values-vi/strings.xml b/tordnscrypt/src/main/res/values-vi/strings.xml index a6b3daec9..c7fb17dae 100644 --- a/tordnscrypt/src/main/res/values-vi/strings.xml +++ b/tordnscrypt/src/main/res/values-vi/strings.xml @@ -1,2 +1,139 @@ - \ No newline at end of file + + Cài đặt + I2P đang chạy + Đang kiểm tra xem Root có sẵn không… + DNSCrypt đang khởi động + Tor đang dừng + Hoàn tất + Bạn có muốn hoàn tất cài đặt không? + Lưu thay đổi + Đặt lại cài đặt + Tự khởi động DNSCrypt + Tự khởi động Tor + Tự khởi động I2P + Sử dụng độ trễ (giây) chỉ khi Tự khởi động không chạy đúng cách + Vui lòng tắt Entry Nodes trong Cài đặt Tor, nếu không bạn sẽ không thể sử dụng cầu nối. + Tor với DNSCrypt và I2P + giải pháp tốt nhất cho sự riêng tư của bạn + I2P đã dừng + I2P đang khởi động + I2P đang dừng + I2P đang cài đặt + I2P đã cài đặt + I2P chưa được cài đặt + Bắt đầu khi khởi động + Vui lòng chờ đợi… + Không hiển thị + DNSCrypt đã dừng + DNSCrypt đã cài đặt + DNSCrypt đang cài đặt + DNSCrypt chưa cài đặt + DNSCrypt đang chạy + DNSCrypt đang dừng + Tor đã dừng + Tor đã cài đặt + Tor đang cài đặt + Tor chưa cài đặt + Tor đang khởi động + Đang kết nối + Tor đang chạy + Hủy bỏ + Lỗi! + Cảnh báo! + Cài đặt + Thoát + Đã có lỗi xảy ra! + Phiên bản Premium + Không đồng ý + Đồng ý + Bỏ qua thay đổi + Cho phép + Từ chối + Bạn chắc chứ? Hành động này không thể hoàn tác! + Hỏi sau + Bạn có thể sử dụng InviZible Pro với chế độ VPN cục bộ, hoặc các ứng dụng có proxy riêng hoặc tính năng VPN cục bộ trong chế độ proxy. + Bạn có thể sử dụng InviZible Pro với các ứng dụng có proxy riêng hoặc tính năng VPN cục bộ trong chế độ proxy. + Lỗi lưu tệp! + Cài đặt chung + Cài đặt DNSCrypt + Cài đặt Tor + Cài đặt I2P + Cài đặt nhanh + Sao lưu & Khôi phục + Giới thiệu + Nhật ký + Khôi phục cài đặt + Lưu Cài đặt + Chọn thư mục sao lưu: + Bản sao lưu đã được lưu + Bản sao lưu đã được khôi phục + Cài đặt + Độ trễ + Tự khởi động + Máy chủ DNSCrypt + Chọn máy chủ DNSCrypt + Cài đặt Tor + Cài đặt định tuyến + Định tuyến tất cả lưu lượng qua Tor + Định tuyến tất cả lưu lượng qua InviZible + Chọn trang web + Danh sách trang web để mở với Tor. Tính năng này không hoạt động với các trang web nằm sau CDN. + Chọn ứng dụng + Danh sách ứng dụng để sử dụng với Tor + Danh sách ứng dụng để sử dụng với InviZible + Loại trừ trang web + Danh sách trang web để mở trực tiếp. Tính năng này không hoạt động với các trang web nằm sau CDN. + Loại trừ ứng dụng + Danh sách ứng dụng để mở trực tiếp + Loại trừ khỏi Tor + Định tuyến qua Tor + Loại trừ UDP khỏi Tor + Loại trừ hoàn toàn + Khoảng thời gian làm mới + Thời gian tính bằng giờ để làm mới IP của trang web. Dành cho Android 5.1 trở lên. Đặt 0 để ngừng làm mới. + Cầu nối (Bridges) + Sử dụng nó nếu bạn không thể kết nối với mạng Tor. + Sử dụng danh sách cầu nối mặc định + Không sử dụng cầu nối + Mã hóa + Vui lòng vô hiệu hóa cầu nối trước! + Chỉnh sửa cầu nối + Thêm cầu nối + Yêu cầu cầu nối + Sử dụng danh sách cầu nối riêng + Cầu nối IPv6 + Các cầu nối Tor mặc định mới có sẵn. Bạn có muốn cập nhật chúng không? + Vui lòng chọn loại mã hóa: + Vui lòng nhập các ký tự từ hình ảnh. + Đang yêu cầu cầu nối mới + Cầu nối mới của bạn từ bridges.torproject.org: + Lưu + Đóng + Giả mạo SNI + Tên máy chủ không đúng! + Chọn chủ đề + Chế độ ban ngày + Chế độ ban đêm + Tự động + Mặc định hệ thống + Bỏ qua các địa chỉ LAN + Từ chối kết nối tới cổng http 80 + Chọn ngôn ngữ + Nhật ký thời gian thực + Hiển thị nhật ký kết nối ứng dụng trong tab DNS + Không sử dụng Tor cho các địa chỉ LAN và các khối IP dự trữ của IANA + Chặn http + Cập nhật + Tự động cập nhật + Kiểm tra cập nhật InviZible Pro và các mô-đun hàng ngày + Kiểm tra cập nhật + Kiểm tra xem có phiên bản mới nào khả dụng không + Cập nhật một cách nghiêm ngặt qua Tor + Chỉ kiểm tra cập nhật qua Tor và khi Tor đang chạy + Sử dụng quyền root + Chạy các mô-đun với quyền root + Sử dụng quyền root cho các mô-đun DNSCrypt, Tor và I2P. Bật tính năng này sẽ khiến các mô-đun không được quản lý và có thể gây ra vấn đề kết nối! + Khác + Chặn http HOTSPOT + diff --git a/tordnscrypt/src/main/res/values-zh/strings.xml b/tordnscrypt/src/main/res/values-zh/strings.xml index 086dd2cdc..c07717363 100644 --- a/tordnscrypt/src/main/res/values-zh/strings.xml +++ b/tordnscrypt/src/main/res/values-zh/strings.xml @@ -374,7 +374,7 @@ "导入域名列表。您可以选择多个文件,重复行将被移除。" "擦除白名单" 立即使用空响应回答 IPv6 相关请求。 当没有 IPv6 连接时可以加快速度,但也会对一些解析器造成可靠性问题。 - "请在 Android 网络设置中关闭“私人 DNS”。此选项与 InviZible 冲突。" + 请在 Android 网络设置中关闭“私人 DNS”。此选项与 InviZible 冲突。 "导入规则" "请稍候…已导入 %d 条规则。" "完成!已导入 %d 条规则。" @@ -506,7 +506,7 @@ \n\n\t如果您的设备是root的,InviZble Pro可以使用root,或者使用本地VPN将互联网流量直接传递到Tor、DNSCryt和I2P网络。 \n\n\t隐私政策: \n\tInviZble Pro不收集或共享任何个人或敏感用户数据。 - \n\n\t版权所有©2019-2024。 + \n\n\t版权所有©2019-2025。 \n\tGarmatin Oleksandr。 \n\t邮箱:invizible.soft@gmail.com。 \n\tInvizible.net/en/privacy diff --git a/tordnscrypt/src/main/res/values/array.xml b/tordnscrypt/src/main/res/values/array.xml index 9f4635eff..c4b11d82f 100644 --- a/tordnscrypt/src/main/res/values/array.xml +++ b/tordnscrypt/src/main/res/values/array.xml @@ -508,10 +508,12 @@ stun.nextcloud.com:443 stun.sipgate.net:10000 stun.epygi.com:3478 - stun.sonetel.com:3479 stun.uls.co.za:3478 stun.voipgate.com:3478 - stun.voys.nl:3478 + stun.bethesda.net:3478 + stun.mixvoip.com:3478 + stun.voipia.net:3478 + stun.antisip.com:3478 @@ -593,4 +595,13 @@ http://notbob.i2p/hosts-all.txt http://rus.i2p/hosts.txt + + org.torproject.torbrowser_alpha + org.torproject.torbrowser + org.onionshare.android.fdroid + org.onionshare.android + org.torproject.android + org.briarproject.briar.android + im.cwtch.flwtch + diff --git a/tordnscrypt/src/main/res/values/strings.xml b/tordnscrypt/src/main/res/values/strings.xml index 4e2ee7813..2a6ca70b5 100644 --- a/tordnscrypt/src/main/res/values/strings.xml +++ b/tordnscrypt/src/main/res/values/strings.xml @@ -513,6 +513,7 @@ Looks like Tor was killed by android system. Your internet connection was restored. Check device Settings! Looks like I2P was killed by android system. Check device Settings! Looks like DNSCrypt can\'t connect to the internet. You can try to choose another DNSCrypt servers. Please find them in MENU -> Fast Settings -> Select DNSCrypt servers + You have configured to use the Socks5 proxy, but the proxy doesn\'t seem to be running. Please check your settings! Looks like Tor can\'t connect to the internet. ISP may blocks Tor connections. You can try to use Tor Bridges. Please find them in MENU -> Fast Settings -> Bridges Child Lock You can lock control of this application. Please enter password, or use previous. @@ -539,7 +540,7 @@ Application Version: Build Date: Processor Version: - Copyright (C) 2019-2024 Garmatin Oleksandr\n All Rights Reserved\n Web Site: + Copyright (C) 2019-2025 Garmatin Oleksandr\n All Rights Reserved\n Web Site: Privacy Policy \tInviZible Pro does not collect or share any personal or sensitive user data. InviZible Pro forwards internet traffic directly to Tor, DNSCrypt and I2P networks. @@ -620,7 +621,7 @@ \n\n\tInviZible Pro can use root if your device is rooted, or uses a local VPN to deliver internet traffic directly to the Tor, DNSCrypt and I2P networks. \n\n\tPrivacy Policy: \n\tInviZible Pro does not collect or share any personal or sensitive user data. - \n\n\tCopyright © 2019-2024 + \n\n\tCopyright © 2019-2025 \n\tGarmatin Oleksandr \n\tinvizible.soft@gmail.com \n\tinvizible.net/en/privacy @@ -644,6 +645,7 @@ Exclude Applications Applications list to open directly. These applications will not use Tor or Proxy to connect to the Internet! Please select proxy owner application at least, if you are using a local proxy. + Use proxy for apps that bypass Tor Use proxy for DNSCrypt Use proxy for Tor Use proxy for Purple I2P diff --git a/tordnscrypt/src/main/res/xml/preferences_common.xml b/tordnscrypt/src/main/res/xml/preferences_common.xml index 150b9adb8..707c3de87 100644 --- a/tordnscrypt/src/main/res/xml/preferences_common.xml +++ b/tordnscrypt/src/main/res/xml/preferences_common.xml @@ -111,9 +111,8 @@ android:key="categoryCommonProxy" android:layout="@layout/preferences_category_custom" android:title="@string/pref_common_proxy_categ"> - diff --git a/tordnscrypt/src/pro/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java b/tordnscrypt/src/pro/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java index 32cd8491f..c1412cd27 100644 --- a/tordnscrypt/src/pro/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java +++ b/tordnscrypt/src/pro/java/pan/alexander/tordnscrypt/assistance/AccelerateDevelop.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ import javax.inject.Inject; diff --git a/tordnscrypt/src/release/java/pan/alexander/tordnscrypt/utils/multidex/MultidexActivator.kt b/tordnscrypt/src/release/java/pan/alexander/tordnscrypt/utils/multidex/MultidexActivator.kt index aef717fac..05bd31559 100644 --- a/tordnscrypt/src/release/java/pan/alexander/tordnscrypt/utils/multidex/MultidexActivator.kt +++ b/tordnscrypt/src/release/java/pan/alexander/tordnscrypt/utils/multidex/MultidexActivator.kt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with InviZible Pro. If not, see . - Copyright 2019-2024 by Garmatin Oleksandr invizible.soft@gmail.com + Copyright 2019-2025 by Garmatin Oleksandr invizible.soft@gmail.com */ package pan.alexander.tordnscrypt.utils.multidex