CSharp parallel Mergesort

Back to CSharp tips

This class implements a parallel and stable Mergesort for generic arrays, but it can be easily adapted for generic lists.

using System;
using System.Collections.Generic;
using System.Threading.Tasks;

namespace Sto
{
  /// <summary>
  /// Sorts a list of objects using the mergesort algorithm.
  /// This algorithm is stable (equal elements keep their position).
  /// The sorting will run parallel, to take advantage of several processors.
  /// Copyright: 2011 by Martin Stoeckli (www.martinstoeckli.ch/csharp)
  /// This code may be freely used in all kind of software.
  /// </summary>
  /// <remarks>
  /// The mergesort algorithm needs at least 30% less comparisons than a
  /// quicksort. This can be important, because copying an object is a cheap
  /// pointer operation, but the comparison of two objects can be expensive.
  /// </remarks>
  /// <typeparam name="T">Type of the elements of the list to sort.</typeparam>
  public class Sto_ParallelMergeSort<T>
  {
    private int _taskCount;
    private int _elementCountPerTask;
    private int _comparisonCount;
    protected IComparer<T> _comparer;

    /// <summary>
    /// Sorts a list of objects using a parallel mergesort algorithm.
    /// This algorithm is stable (equal elements keep their position).
    /// This overloaded method uses the default comparer.
    /// </summary>
    /// <param name="list">List with elements to sort.</param>
    public void Sort(T[] list)
    {
      Sort(list, Comparer<T>.Default);
    }

    /// <summary>
    /// Sorts a list of objects using a parallel mergesort algorithm.
    /// This algorithm is stable (equal elements keep their position).
    /// </summary>
    /// <param name="list">List with elements to sort.</param>
    /// <param name="comparison">A comparison delegate, which can compare
    /// two elements of the list.</param>
    public void Sort(T[] list, Comparison<T> comparison)
    {
      Sort(list, new ComparerFromComparison<T>(comparison));
    }

    /// <summary>
    /// Sorts a list of objects using a parallel mergesort algorithm.
    /// This algorithm is stable (equal elements keep their position).
    /// </summary>
    /// <param name="list">List with elements to sort.</param>
    /// <param name="comparer">A comparer, which can compare two elements
    /// of the list.</param>
    public void Sort(T[] list, IComparer<T> comparer)
    {
      _comparisonCount = 0;
      _comparer = comparer;
      _taskCount = DetermineTaskCount(list.Length);
      _elementCountPerTask = (list.Length - 1) / _taskCount + 1;

      // Create a buffer of the same size as the list.
      var tempList = new T[list.Length];

      if (_taskCount > 1)
      {
        // create tasks for sorting parts of the list
        Task[] tasks = new Task[_taskCount];
        for (int taskIndex = 0; taskIndex < _taskCount; taskIndex++)
        {
          // calculate range for task
          int bottom = taskIndex * _elementCountPerTask;
          int top = Math.Min(list.Length - 1, bottom + _elementCountPerTask - 1);

          // start sorting a part of the list
          tasks[taskIndex] = Task.Factory.StartNew(
            delegate()
            {
              SortPartOfList(list, tempList, bottom, top, 1);
            });
        }
        Task.WaitAll(tasks);

        // merge all sorted list parts
        SortPartOfList(list, tempList, 0, list.Length - 1, _elementCountPerTask);
      }
      else
      {
        // sort list without creating tasks
        SortPartOfList(list, tempList, 0, list.Length - 1, 1);
      }
    }

    /// <summary>
    /// This method can be called after sorting a list, to get some
    /// statistic information of the sorting.
    /// </summary>
    /// <returns>Formatted statistics information.</returns>
    public string GetStatistics()
    {
      return string.Format(
        "Tasks: {0}\r\nComparisons: {1}\r\nElementsPerTask: {2}\r\n",
        _taskCount, _comparisonCount, _elementCountPerTask);
    }

    /// <summary>
    /// Determines in how many tasks the sorting should be splitted.
    /// </summary>
    /// <param name="listLength">Number of elements in the list.</param>
    /// <returns>Number of tasks the sorting should use.</returns>
    protected int DetermineTaskCount(int listLength)
    {
      // Determines how many parallel tasks may be created maximal per processor.
      // More tasks per processor does not mean more speed, sometimes even less.
      const int MaxTasksPerProcessor = 1;

      // Determines the minimum of elements, one task should get.
      const int MinElementsForTask = 256;

      int maxDesiredTasks = Environment.ProcessorCount * MaxTasksPerProcessor;
      int maxPossibleTasksForList = (listLength - 1) / MinElementsForTask + 1;
      return Math.Min(maxDesiredTasks, maxPossibleTasksForList);
    }

    /// <summary>
    /// Sorts a range of a list, defined by <paramref name="bottom"/> to
    /// <paramref name="top"/>. Sorting this range will not influence elements
    /// outside of the range.
    /// </summary>
    /// <param name="list">List with elements to sort.</param>
    /// <param name="tempList">Buffer of the same size as list.</param>
    /// <param name="bottom">First element of range.</param>
    /// <param name="top">Last element of range.</param>
    /// <param name="blockSize">The block size. Elements in the same
    /// block have to be sorted already.</param>
    private void SortPartOfList(
      T[] list, T[] tempList, int bottom, int top, int blockSize)
    {
      while (blockSize < (top - bottom + 1))
      {
        // Merge blocks from list to tempList
        MergeToTarget(tempList, list, bottom, top, blockSize);
        blockSize = blockSize * 2;

        // Merge blocks back to list.
        // If all elements are already sorted, then only copy back.
        if (blockSize < (top - bottom + 1))
          MergeToTarget(list, tempList, bottom, top, blockSize);
        else
          Array.Copy(tempList, bottom, list, bottom, top - bottom + 1);
        blockSize = blockSize * 2;
      }
    }

    /// <summary>
    /// Performs a single merge of a part of the list.
    /// </summary>
    /// <param name="targetList">Receives sorted elements.</param>
    /// <param name="sourceList">Contains the elements to sort.</param>
    /// <param name="bottom">Index of the first element to merge.</param>
    /// <param name="top">Index of the last element to merge.</param>
    /// <param name="blockSize">The block size. Elements in the same
    /// block have to be sorted already.</param>
    private void MergeToTarget(
      T[] targetList, T[] sourceList, int bottom, int top, int blockSize)
    {
      int beginBlock1 = bottom;
      int beginBlock2 = Math.Min(bottom + blockSize, top);
      for (int i = 1; i <= ((top - bottom + 1) / (blockSize * 2)) + 1; i++)
      {
        int endBlock2 = Math.Min(beginBlock2 + blockSize - 1, top);
        MergeTwoBlocks(targetList, sourceList, beginBlock1, beginBlock2 - 1, beginBlock2, endBlock2);
        beginBlock1 = endBlock2 + 1;
        beginBlock2 = Math.Min(beginBlock1 + blockSize, top);
      }
    }

    /// <summary>
    /// Merges 2 consecutive and already sorted blocks from <paramref name="sourceList"/>
    /// to 1 sorted block in <paramref name="targetList"/>. The block in the target list
    /// will begin at the same index.
    /// </summary>
    /// <param name="targetList">Receives sorted elements.</param>
    /// <param name="sourceList">Contains the two blocks to merge.</param>
    /// <param name="beginBlock1">First index of block 1, this is also the index
    /// where the block in targetList will start.</param>
    /// <param name="endBlock1">Index of last element of block 1.</param>
    /// <param name="beginBlock2">First index of block 2 (always endBlock1 + 1).</param>
    /// <param name="endBlock2">Index of last element of block2.</param>
    private void MergeTwoBlocks(
      T[] targetList, T[] sourceList, int beginBlock1, int endBlock1, int beginBlock2, int endBlock2)
    {
      int targetIndex = beginBlock1;
      for (; targetIndex <= endBlock2; targetIndex++)
      {
        // check if nothing is left from block1
        if (beginBlock1 > endBlock1)
          targetList[targetIndex] = sourceList[beginBlock2++];
        // check if nothing is left from block2
        else if (beginBlock2 > endBlock2)
          targetList[targetIndex] = sourceList[beginBlock1++];
        else
        {
          // check if next element of block1, is less or equal than next element of block2
          _comparisonCount++;
          if (_comparer.Compare(sourceList[beginBlock1], sourceList[beginBlock2]) <= 0)
            targetList[targetIndex] = sourceList[beginBlock1++];
          else
            targetList[targetIndex] = sourceList[beginBlock2++];
        }
      }
    }

    /// <summary>
    /// Private helper class, which creates a Comparer around a Comparison delegate.
    /// </summary>
    /// <typeparam name="TU">Type of the elements of the sortable list.</typeparam>
    private class ComparerFromComparison<TU> : IComparer<TU>
    {
      private readonly Comparison<TU> _comparison;

      public ComparerFromComparison(Comparison<TU> comparison)
      {
        _comparison = comparison;
      }

      public int Compare(TU x, TU y)
      {
        return _comparison(x, y);
      }
    }
  }
}

And this is an example, that shows how to sort an array of strings, with a case in-sensitive comparer:

string[] values = new string[] { "The", "brown", "fox", "jumps", "Fox" };

var sorter = new Sto_ParallelMergeSort<string>();
sorter.Sort(
  values,
  delegate (string value1, string value2)
    {
      return string.Compare(value1, value2, true);
    });

www.martinstoeckli.ch