blob: a27c4d4779c155cec65d58ddd6eb26873385fdda [file] [log] [blame]
// Copyright (C) 2006 Fokko Beekhof
// Email contact: Fokko.Beekhof@cui.unige.ch
// The OMPTL library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#ifndef OMPTL_TOOLS_H
#define OMPTL_TOOLS_H 1
#include <utility>
#include <vector>
#include <cassert>
#include <algorithm>
#include <climits>
namespace omptl
{
// Log of the number of operations that is expected to run faster in a single
// thread.
const unsigned C = 12;
template <typename T>
T log2N_(T n)
{
if (n == 0)
return 0;
const unsigned b[] =
#if (WORD_BIT == 32)
{0x2u, 0xCu, 0xF0u, 0xFF00u, 0xFFFF0000u};
#else // 64-bit
{0x2u, 0xCu, 0xF0u, 0xFF00u, 0xFFFF0000u, 0xFFFFFFFF00000000u};
assert(WORD_BIT == 64);
#endif
const T S[] = {1u, 2u, 4u, 8u, 16u, 32u, 64u, 128u};
T result = 0u; // result of log2(v) will go here
for (int i = static_cast<int>(sizeof(T)); i >= 0; --i)
{
if (n & b[i])
{
n >>= S[i];
result |= S[i];
}
}
return result;
}
template<typename Iterator>
bool _linear_serial_is_faster(Iterator first, Iterator last,
const unsigned P)
{
assert(P > 0u);
assert(::std::distance(first, last) >= 0);
const unsigned N = ::std::distance(first, last);
return (N < 2u*P) || (log2N_(N) < C);
}
template<typename Iterator>
bool _logn_serial_is_faster(Iterator first, Iterator last,
const unsigned P)
{
assert(P > 0u);
assert(::std::distance(first, last) >= 0);
const unsigned N = ::std::distance(first, last);
return (N < 2u*P) || (log2N_(N) < (1 << C));
}
template<typename Iterator>
bool _nlogn_serial_is_faster(Iterator first, Iterator last,
const unsigned P)
{
assert(P > 0u);
assert(::std::distance(first, last) >= 0);
const unsigned N = ::std::distance(first, last);
return (N < 2u*P) || (N*log2N_(N) < (1 << C));
}
template<typename Iterator1, typename Iterator2>
void _copy_partitions(const ::std::vector< ::std::pair<Iterator1, Iterator1> >
&source_partitions, Iterator2 first,
::std::vector<Iterator2> &dest_partitions, const unsigned P)
{
assert(source_partitions.size() == P);
assert(dest_partitions.size() == P);
for (unsigned i = 0; i < P; ++i)
{
dest_partitions[i] = first;
// The last "advance" is very important, it may create space
// if it is an InsertIterator or something like that.
::std::advance(first, ::std::distance(
source_partitions[i].first,
source_partitions[i].second) );
}
}
// Divide a given range into P partitions
template<typename Iterator>
void _partition_range(Iterator first, Iterator last,
::std::vector< ::std::pair<Iterator, Iterator> > &partitions,
const unsigned P)
{
assert(partitions.size() == P);
typedef ::std::pair<Iterator, Iterator> Partition;
const unsigned N = ::std::distance(first, last);
const unsigned range = N / P + ((N%P)? 1 : 0);
assert(2u*P <= N);
assert(range <= N);
// All but last partition have same range
Iterator currentLast = first;
::std::advance(currentLast, range);
for (unsigned i = 0; i < P - 1; ++i)
{
partitions[i] = Partition(first, currentLast);
first = currentLast;
::std::advance(currentLast, range);
}
// Last range may be shorter
assert(::std::distance(first, last) <= range);
partitions[P - 1] = Partition(first, last);
}
// Given a range, re-arrange the items such that all elements smaller than
// the pivot precede all other values. Returns an Iterator to the first
// element not smaller than the pivot.
template<typename Iterator, class StrictWeakOrdering>
Iterator _stable_pivot_range(Iterator first, Iterator last,
const typename ::std::iterator_traits<Iterator>::value_type pivot,
StrictWeakOrdering comp = std::less<
typename ::std::iterator_traits<Iterator>::value_type>())
{
Iterator pivotIt = last;
while (first < last)
{
if (comp(*first, pivot))
++first;
else
{
Iterator high = first;
while ( (++high < last) && !comp(*high, pivot) )
/* nop */;
if (high < last)
::std::iter_swap(first, last);
first = pivotIt = ++high;
}
}
return pivotIt;
}
template<typename Iterator, class StrictWeakOrdering>
Iterator _pivot_range(Iterator first, Iterator last,
const typename ::std::iterator_traits<Iterator>::value_type pivot,
StrictWeakOrdering comp)
{
while (first < last)
{
if (comp(*first, pivot))
++first;
else
{
while ( (first < --last) && !comp(*last, pivot) )
/* nop */;
::std::iter_swap(first, last);
}
}
return last;
}
template<typename Iterator, class StrictWeakOrdering>
void _partition_range_by_pivots(Iterator first, Iterator last,
const ::std::vector<typename
::std::iterator_traits<Iterator>::value_type> &pivots,
::std::vector< ::std::pair<Iterator, Iterator> > &partitions,
StrictWeakOrdering comp, const unsigned P)
{
assert(partitions.size() == P);
typedef ::std::pair<Iterator, Iterator> Partition;
::std::vector<Iterator> ptable(P);
::std::vector<typename ::std::iterator_traits<Iterator>::value_type>
pvts(pivots.size());
::std::vector<Iterator> borders;
::std::vector<bool> used(pivots.size());
::std::fill(used.begin(), used.end(), false);
borders.push_back(first);
borders.push_back(last);
partitions[0].first = first;
partitions[0].second = last;
for (unsigned p = 1; (1 << p) <= (int)P; ++p)
{
const int PROC = (1 << p);
const int PIVOTS = (1 << (p-1));
assert(PIVOTS <= (int)pivots.size());
#pragma omp parallel for //default(none) shared(used, pvts)
for (int t = 0; t < PIVOTS; ++t)
{
const int index = int(P / PROC) +
2 * t * int(P / PROC) - 1;
assert(index < (int)pivots.size());
assert(!used[index]);
used[index] = true;
pvts[t] = pivots[index];
/*::std::cout << "pvts T: " << t << " --> " << index <<
" " << pvts[t] << ::std::endl;*/
}
#pragma omp parallel for //default(none) private(comp)
// shared(ptable, pvts, partitions)
for (int t = 0; t < PIVOTS; ++t)
ptable[t] = _pivot_range(partitions[t].first,
partitions[t].second,
pvts[t], comp);
for (int i = 0; i < PIVOTS; ++i)
{
// ::std::cout << "ADD: " << ::std::distance(first, ptable[t]) << ::std::endl;
borders.push_back(ptable[i]);
}
::std::sort(borders.begin(), borders.end());
for (unsigned i = 0; i < borders.size() - 1; ++i)
{
partitions[i].first = borders[i];
partitions[i].second = borders[i + 1];
}
/*::std::cout << "PASS: " << p << ::std::endl;
for (t = 0; t < (1 << p); ++t)
::std::cout << t << ": " << ::std::distance(first, partitions[t].first)
<< " - " << ::std::distance(first, partitions[t].second)
<< ::std::endl;*/
}
for (unsigned i = 0; i < pivots.size(); ++i)
if(!used[i])
pvts[i] = pivots[i];
#pragma omp parallel for // default(none) private(t, comp)
// shared(used, ptable, pvts, partitions)
for (int t = 0; t < int(pivots.size()); ++t)
if (!used[t])
ptable[t] = _pivot_range(partitions[t].first,
partitions[t].second,
pvts[t], comp);
for (unsigned i = 0; i < pivots.size(); ++i)
{
if (!used[i])
{
// ::std::cout << "LAST ADD: " << ::std::distance(first, ptable[i])
// << ::std::endl;
borders.push_back(ptable[i]);
}
}
::std::sort(borders.begin(), borders.end());
assert(borders.size() - 1 == P);
for (unsigned i = 0; i < P; ++i)
{
partitions[i].first = borders[i];
partitions[i].second = borders[i + 1];
}
// ::std::cout << "LAST: " << p << ::std::endl;
// for (t = 0; t < P; ++t)
// ::std::cout << t << ": " << ::std::distance(first, partitions[t].first)
// << " - " << ::std::distance(first, partitions[t].second)
// << ::std::endl;
}
template<typename Iterator>
void _partition_range_stable_by_pivots(Iterator first, Iterator last,
const ::std::vector<typename
::std::iterator_traits<Iterator>::value_type> &pivots,
::std::vector< ::std::pair<Iterator, Iterator> > &partitions,
std::less<typename ::std::iterator_traits<Iterator>::value_type> comp,
const unsigned P)
{
typedef ::std::pair<Iterator, Iterator> Partition;
Iterator start = first;
for (unsigned i = 0; i < P - 1; ++i)
{
Iterator low = start;
while (low < last)
{
// Find a value not lower than the pivot.
while( (*low < pivots[i]) && (low < last) )
::std::advance(low, 1);
// Entire range scanned ?
if (low == last) break;
// Find a value lower than the pivot, starting from
// low, working our way up.
Iterator high = low;
::std::advance(high, 1);
while( !(*high < pivots[i]) && (high < last) )
::std::advance(high, 1);
// Entire range scanned ?
if (high == last) break;
// Swap values
assert( !(*low<pivots[i]) && (*high<pivots[i]) );
::std::iter_swap(low, high);
}
partitions[i] = Partition(start, low);
start = low;
}
partitions[P - 1] = Partition(start, last);
}
/*
* The sample ratio is used to sample more data. This way, the pivots can be
* chosen more wisely, which is our only guarantee we can generate partitions
* of equal size.
*/
template<typename RandomAccessIterator>
void _find_pivots(RandomAccessIterator first, RandomAccessIterator last,
::std::vector<typename
::std::iterator_traits<RandomAccessIterator>::value_type> &pivots,
const unsigned P, unsigned SAMPLE_RATIO = 10)
{
assert(SAMPLE_RATIO > 0);
const unsigned N = ::std::distance(first, last);
assert(N >= 2u*P);
// Adjust the constant. Erm.
while (SAMPLE_RATIO * (P + 1) > N)
SAMPLE_RATIO /= 2;
pivots.clear();
pivots.reserve(P - 1);
typedef typename
::std::iterator_traits<RandomAccessIterator>::value_type value_type;
::std::vector<value_type> samples;
const unsigned NSAMPLES = SAMPLE_RATIO * P + SAMPLE_RATIO;
samples.reserve(NSAMPLES);
for (unsigned i = 0; i < NSAMPLES; ++i)
{
const unsigned offset = i * (N-1) / (NSAMPLES - 1);
assert(offset < N);
samples.push_back(*(first + offset));
// std::cout << "offset: " << offset << " sample: " << samples[i] << std::endl;
}
assert(samples.size() == NSAMPLES);
// Sort samples to create relative ordering in data
::std::sort(samples.begin(), samples.end());
// Take pivots from sampled data
for (unsigned i = 1; i < P; ++i)
{
pivots.push_back(samples[i * samples.size() / P]);
/*std::cout << "pivot: " << i << " idx: " << (i * samples.size() / P)
<< " " << pivots[i-1] << std::endl;*/
}
assert(pivots.size() == P - 1);
}
} // namespace omptl
#endif /* OMPTL_TOOLS_H */