Commit a0e1aa54 authored by henry's avatar henry
Browse files

Removed Itanium prefetch statements.

parent 94b82905
......@@ -67,38 +67,15 @@ void Foam::lduMatrix::Amul
register const label nCells = diag().size();
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&ApsiPtr[cell+96],1,1);
#endif
ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
}
register const label nFaces = upper().size();
#ifdef ICC_IA64_PREFETCH
#pragma swp
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&ApsiPtr[uPtr[face+32]],0,1);
#endif
ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&ApsiPtr[lPtr[face+32]],0,1);
#endif
ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
}
......@@ -151,34 +128,13 @@ void Foam::lduMatrix::Tmul
register const label nCells = diag().size();
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&TpsiPtr[cell+96],1,1);
#endif
TpsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
}
register const label nFaces = upper().size();
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&TpsiPtr[uPtr[face+32]],0,1);
#endif
TpsiPtr[uPtr[face]] += upperPtr[face]*psiPtr[lPtr[face]];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&TpsiPtr[lPtr[face+32]],0,1);
#endif
TpsiPtr[lPtr[face]] += lowerPtr[face]*psiPtr[uPtr[face]];
}
......@@ -218,34 +174,12 @@ void Foam::lduMatrix::sumA
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&sumAPtr[cell+96],1,1);
#endif
sumAPtr[cell] = diagPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma swp
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&sumAPtr[uPtr[face+32]],0,1);
#endif
sumAPtr[uPtr[face]] += lowerPtr[face];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&sumAPtr[lPtr[face+32]],0,1);
#endif
sumAPtr[lPtr[face]] += upperPtr[face];
}
......@@ -323,39 +257,15 @@ void Foam::lduMatrix::residual
register const label nCells = diag().size();
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&sourcePtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],1,1);
#endif
rAPtr[cell] = sourcePtr[cell] - diagPtr[cell]*psiPtr[cell];
}
register const label nFaces = upper().size();
#ifdef ICC_IA64_PREFETCH
#pragma swp
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&rAPtr[uPtr[face+32]],0,1);
#endif
rAPtr[uPtr[face]] -= lowerPtr[face]*psiPtr[lPtr[face]];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&rAPtr[lPtr[face+32]],0,1);
#endif
rAPtr[lPtr[face]] -= upperPtr[face]*psiPtr[uPtr[face]];
}
......
......@@ -353,20 +353,7 @@ Foam::tmp<Foam::scalarField > Foam::lduMatrix::H1() const
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&H1Ptr[uPtr[face+32]],0,1);
#endif
H1Ptr[uPtr[face]] -= lowerPtr[face];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&H1Ptr[lPtr[face+32]],0,1);
#endif
H1Ptr[lPtr[face]] -= upperPtr[face];
}
}
......
......@@ -71,29 +71,15 @@ void Foam::DICPreconditioner::calcReciprocalD
register const label nFaces = matrix.upper().size();
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
#endif
rDPtr[uPtr[face]] -= upperPtr[face]*upperPtr[face]/rDPtr[lPtr[face]];
}
// Calculate the reciprocal of the preconditioned diagonal
register const label nCells = rD.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/rDPtr[cell];
}
}
......@@ -120,61 +106,18 @@ void Foam::DICPreconditioner::precondition
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,upperPtr,rDPtr,wAPtr
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,0);
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face+32]],0,1);
#endif
wAPtr[uPtr[face]] -= rDPtr[uPtr[face]]*upperPtr[face]*wAPtr[lPtr[face]];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDPtr,wAPtr
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face-95],0,0);
__builtin_prefetch (&lPtr[face-95],0,0);
__builtin_prefetch (&rDPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
#endif
wAPtr[lPtr[face]] -= rDPtr[lPtr[face]]*upperPtr[face]*wAPtr[uPtr[face]];
}
}
......
......@@ -72,30 +72,15 @@ void Foam::DILUPreconditioner::calcReciprocalD
register label nFaces = matrix.upper().size();
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&lowerPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
#endif
rDPtr[uPtr[face]] -= upperPtr[face]*lowerPtr[face]/rDPtr[lPtr[face]];
}
// Calculate the reciprocal of the preconditioned diagonal
register label nCells = rD.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/rDPtr[cell];
}
}
......@@ -128,26 +113,14 @@ void Foam::DILUPreconditioner::precondition
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
register label sface;
#ifdef ICC_IA64_PREFETCH
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
sface = losortPtr[face];
......@@ -155,28 +128,8 @@ void Foam::DILUPreconditioner::precondition
rDPtr[uPtr[sface]]*lowerPtr[sface]*wAPtr[lPtr[sface]];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDPtr,wAPtr
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face-95],0,0);
__builtin_prefetch (&lPtr[face-95],0,0);
__builtin_prefetch (&upperPtr[face-95],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
#endif
wAPtr[lPtr[face]] -=
rDPtr[lPtr[face]]*upperPtr[face]*wAPtr[uPtr[face]];
}
......@@ -210,46 +163,20 @@ void Foam::DILUPreconditioner::preconditionT
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wTPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rTPtr[cell+96],0,1);
#endif
wTPtr[cell] = rDPtr[cell]*rTPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,upperPtr,rDPtr,wTPtr
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wTPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&wTPtr[uPtr[face+32]],0,1);
#endif
wTPtr[uPtr[face]] -=
rDPtr[uPtr[face]]*upperPtr[face]*wTPtr[lPtr[face]];
}
register label sface;
#ifdef ICC_IA64_PREFETCH
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
sface = losortPtr[face];
......
......@@ -66,47 +66,17 @@ Foam::FDICPreconditioner::FDICPreconditioner
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
#endif
rDPtr[uPtr[face]] -= sqr(upperPtr[face])/rDPtr[lPtr[face]];
}
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
// Generate reciprocal FDIC
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/rDPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,0);
__builtin_prefetch (&rDuUpperPtr[face+96],0,0);
__builtin_prefetch (&rDlUpperPtr[face+96],0,0);
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+32]],0,1);
#endif
rDuUpperPtr[face] = rDPtr[uPtr[face]]*upperPtr[face];
rDlUpperPtr[face] = rDPtr[lPtr[face]]*upperPtr[face];
}
......@@ -138,58 +108,18 @@ void Foam::FDICPreconditioner::precondition
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDuUpperPtr,wAPtr
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&rDuUpperPtr[face+96],0,0);
__builtin_prefetch (&wAPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face+32]],0,1);
#endif
wAPtr[uPtr[face]] -= rDuUpperPtr[face]*wAPtr[lPtr[face]];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDlUpperPtr,wAPtr
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face-95],0,0);
__builtin_prefetch (&lPtr[face-95],0,0);
__builtin_prefetch (&rDlUpperPtr[face-95],0,0);
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
#endif
wAPtr[lPtr[face]] -= rDlUpperPtr[face]*wAPtr[uPtr[face]];
}
}
......
......@@ -58,18 +58,9 @@ Foam::diagonalPreconditioner::diagonalPreconditioner
register label nCells = rD.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
// Generate reciprocal diagonal
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&DPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/DPtr[cell];
}
}
......@@ -90,18 +81,8 @@ void Foam::diagonalPreconditioner::precondition
register label nCells = wA.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
}
......
......@@ -68,17 +68,8 @@ void Foam::noPreconditioner::precondition
register label nCells = wA.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rAPtr[cell];
}
}
......
......@@ -146,19 +146,6 @@ void Foam::GaussSeidelSmoother::smooth
for (register label cellI=0; cellI<nCells; cellI++)