mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-27 23:32:02 +08:00
finally commit Rohit's work as the start of a new (currently
unsupported) module, MoreVectorization. CCMAIL:rpg.314@gmail.com
This commit is contained in:
parent
804a239d30
commit
2bb1c9e8dc
@ -1,4 +1,4 @@
|
|||||||
set(Eigen_HEADERS AdolcForward BVH IterativeSolvers)
|
set(Eigen_HEADERS AdolcForward BVH IterativeSolvers MoreVectorization)
|
||||||
|
|
||||||
install(FILES
|
install(FILES
|
||||||
${Eigen_HEADERS}
|
${Eigen_HEADERS}
|
||||||
|
16
unsupported/Eigen/MoreVectorization
Normal file
16
unsupported/Eigen/MoreVectorization
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#ifndef EIGEN_MOREVECTORIZATION_MODULE_H
|
||||||
|
#define EIGEN_MOREVECTORIZATION_MODULE_H
|
||||||
|
|
||||||
|
#include <Eigen/Core>
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
/** \ingroup Unsupported_modules
|
||||||
|
* \defgroup MoreVectorization additional vectorization module
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "src/MoreVectorization/MathFunctions.h"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // EIGEN_MOREVECTORIZATION_MODULE_H
|
@ -1,3 +1,4 @@
|
|||||||
ADD_SUBDIRECTORY(IterativeSolvers)
|
ADD_SUBDIRECTORY(IterativeSolvers)
|
||||||
ADD_SUBDIRECTORY(BVH)
|
ADD_SUBDIRECTORY(BVH)
|
||||||
ADD_SUBDIRECTORY(AutoDiff)
|
ADD_SUBDIRECTORY(AutoDiff)
|
||||||
|
ADD_SUBDIRECTORY(MoreVectorization)
|
||||||
|
6
unsupported/Eigen/src/MoreVectorization/CMakeLists.txt
Normal file
6
unsupported/Eigen/src/MoreVectorization/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
FILE(GLOB Eigen_MoreVectorization_SRCS "*.h")
|
||||||
|
|
||||||
|
INSTALL(FILES
|
||||||
|
${Eigen_MoreVectorization_SRCS}
|
||||||
|
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/MoreVectorization COMPONENT Devel
|
||||||
|
)
|
103
unsupported/Eigen/src/MoreVectorization/MathFunctions.h
Normal file
103
unsupported/Eigen/src/MoreVectorization/MathFunctions.h
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
|
||||||
|
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
|
||||||
|
#define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
|
||||||
|
|
||||||
|
/** \internal \returns the arcsin of \a a (coeff-wise) */
|
||||||
|
template<typename Packet> inline static Packet ei_pasin(Packet a) { return std::asin(a); }
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE
|
||||||
|
|
||||||
|
template<> EIGEN_DONT_INLINE Packet4f ei_pasin(Packet4f x)
|
||||||
|
{
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
|
||||||
|
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
|
||||||
|
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
|
||||||
|
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
|
||||||
|
_EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
|
||||||
|
|
||||||
|
Packet4f a = ei_pabs(x);//got the absolute value
|
||||||
|
|
||||||
|
Packet4f sign_bit= _mm_and_ps(x, ei_p4f_sign_mask);//extracted the sign bit
|
||||||
|
|
||||||
|
Packet4f z1,z2;//will need them during computation
|
||||||
|
|
||||||
|
|
||||||
|
//will compute the two branches for asin
|
||||||
|
//so first compare with half
|
||||||
|
|
||||||
|
Packet4f branch_mask= _mm_cmpgt_ps(a, ei_p4f_half);//this is to select which branch to take
|
||||||
|
//both will be taken, and finally results will be merged
|
||||||
|
//the branch for values >0.5
|
||||||
|
|
||||||
|
{
|
||||||
|
//the core series expansion
|
||||||
|
z1=ei_pmadd(ei_p4f_minus_half,a,ei_p4f_half);
|
||||||
|
Packet4f x1=ei_psqrt(z1);
|
||||||
|
Packet4f s1=ei_pmadd(ei_p4f_asin1, z1, ei_p4f_asin2);
|
||||||
|
Packet4f s2=ei_pmadd(s1, z1, ei_p4f_asin3);
|
||||||
|
Packet4f s3=ei_pmadd(s2,z1, ei_p4f_asin4);
|
||||||
|
Packet4f s4=ei_pmadd(s3,z1, ei_p4f_asin5);
|
||||||
|
Packet4f temp=ei_pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
|
||||||
|
z1=ei_pmadd(temp,x1,x1);
|
||||||
|
z1=ei_padd(z1,z1);
|
||||||
|
z1=ei_psub(ei_p4f_pi_over_2,z1);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
//the core series expansion
|
||||||
|
Packet4f x2=a;
|
||||||
|
z2=ei_pmul(x2,x2);
|
||||||
|
Packet4f s1=ei_pmadd(ei_p4f_asin1, z2, ei_p4f_asin2);
|
||||||
|
Packet4f s2=ei_pmadd(s1, z2, ei_p4f_asin3);
|
||||||
|
Packet4f s3=ei_pmadd(s2,z2, ei_p4f_asin4);
|
||||||
|
Packet4f s4=ei_pmadd(s3,z2, ei_p4f_asin5);
|
||||||
|
Packet4f temp=ei_pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
|
||||||
|
z2=ei_pmadd(temp,x2,x2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* select the correct result from the two branch evaluations */
|
||||||
|
z1 = _mm_and_ps(branch_mask, z1);
|
||||||
|
z2 = _mm_andnot_ps(branch_mask, z2);
|
||||||
|
Packet4f z = _mm_or_ps(z1,z2);
|
||||||
|
|
||||||
|
/* update the sign */
|
||||||
|
return _mm_xor_ps(z, sign_bit);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
|
Loading…
x
Reference in New Issue
Block a user