Run N promises in parallel

Finish M tasks while running N asynchronous tasks at once using Q or ES6 Promises.

I often execute multiple tasks via promises. If the number of tasks is small, I can run all of them in parallel. If the number of tasks is large, I usually build a single chain, otherwise there might be problems. Imagine opening 1000 file descriptors at once - the system will crash.

Simple task

Imagine a simple async task - sleep for a few seconds, then print a message. To better show the elapsed time after the program starts I added a simple console.timestamp method. I typically use Q promise library in my async code.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
var started = new Date();
console.timestamp = function () {
var now = new Date();
var args = Array.prototype.slice.call(arguments, 0);
args.unshift(now - started, 'ms');
console.log(args.join(' '));
}
var Q = require('q')
function delay(ms) {
console.timestamp('start delay', ms);
var defer = Q.defer();
setTimeout(function () {
console.timestamp('finished delay', ms);
defer.resolve(ms);
}, ms);
return defer.promise;
}
delay(1000).done();

prints

13 ms start delay 1000
1016 ms finished delay 1000

Execute all tasks in parallel

We can start all tasks at once, taking 10 seconds to complete (limited by the longest task)

var delays = [10000, 3000, 3000, 3000];
var tasks = delays.map(function (d) {
  return delay(d);
});
Q.all(tasks).done();
// prints
14 ms start delay 10000
15 ms start delay 3000
16 ms start delay 3000
16 ms start delay 3000
3017 ms finished delay 3000
3017 ms finished delay 3000
3017 ms finished delay 3000
10015 ms finished delay 10000

Single sequence

We can chain each task and run them sequentially

1
2
3
4
5
delays.reduce(function (chain, d) {
return chain.then(function () {
return delay(d)
});
}, Q()).done();

The entire run takes 19 seconds

15 ms start delay 10000
10017 ms finished delay 10000
10018 ms start delay 3000
13018 ms finished delay 3000
13019 ms start delay 3000
16018 ms finished delay 3000
16019 ms start delay 3000
19019 ms finished delay 3000

Synchronized groups of N tasks

There is middle ground - finish the overall collection when all M tasks are done, while executing at most N tasks in parallel. A naive approach would chain .all method calls M/N times.

Let us first split delays into groups of N

1
2
3
4
5
6
7
8
9
10
11
12
var delays = [10000, 3000, 3000, 3000];
var N = 2;
var delaysByN = delays.reduce(function (groups, d) {
if (groups[groups.length - 1].length < N) {
groups[groups.length - 1].push(d);
} else {
groups.push([d]);
}
return groups;
}, [[]]);
console.log(delaysByN);
// prints [ [ 10000, 3000 ], [ 3000, 3000 ] ]

Now we can chain several .all tasks

1
2
3
4
5
6
7
8
delaysByN.reduce(function (chain, group) {
return chain.then(function () {
var tasks = group.map(function (d) {
return delay(d);
});
return Q.all(tasks);
});
}, Q()).done();

This prints the following

16 ms start delay 10000
17 ms start delay 3000
3018 ms finished delay 3000
10018 ms finished delay 10000
10018 ms start delay 3000
10018 ms start delay 3000
13018 ms finished delay 3000
13018 ms finished delay 3000

This is inefficient, because .all waits until all promises are fulfilled. Thus some of the N executors will be sitting idle. Notice that first task finishes after 3 seconds, but then sits idle until 10 second delay finishes. Then both executors start again at 10018 ms timestamp.

q-map

Instead of waiting on a group of tasks to finish before kicking off another group, we want to create N chains, and as soon as one chain has been fulfilled, it picks up the next task from the queue.

There is a plugin q-map to run N promises in parallel. It is extremely simple to use and achieves my goal perfectly, it even passes the arguments to the promise-returning function, thus I do not have to prepare the tasks myself

1
2
3
4
Q.map = require('q-map').map;
// Q.map(dataArray, callbackPerItem, N);
var delays = [10000, 3000, 3000, 3000];
Q.map(delays, delay, 2).done();

The output shows 2 executors, picking up tasks as soon as one is done

33 ms start delay 10000
34 ms start delay 3000
3036 ms finished delay 3000
3037 ms start delay 3000
6039 ms finished delay 3000
6039 ms start delay 3000
9040 ms finished delay 3000
10035 ms finished delay 10000

Notice that one executor finishes all 3 second tasks after 9 seconds, while the second executor finishes its single 10 second delay task.

Using ES6 promises

Recently I started using native browser's Promise api, avoiding 3rd party libraries. To polyfill other environments, I recommend es6-promise library. We can easily implement the delay task without Q

1
2
3
4
5
6
7
8
9
function delay(ms) {
console.timestamp('start delay', ms);
return new Promise(function (resolve, reject) {
setTimeout(function () {
console.timestamp('finished delay', ms);
resolve(ms);
}, ms);
});
}

Let us implement N promises in parallel logic ourselves. We will create N chains, each chain will pick up the next available tasks as soon as possible. Finally, we can use Promise.all to wait until all chains are done.

To start, let us start a single chain that will drain the delays one by one. Notice how next is calling pickUpNextTask and then calls itself!

1
2
3
4
5
6
7
8
9
10
11
12
13
var delays = [10000, 3000, 3000, 3000];
function pickUpNextTask() {
if (delays.length) {
return delay(delays.shift());
}
}
function startChain() {
return Promise.resolve().then(function next() {
console.log(delays);
return pickUpNextTask().then(next);
});
}
startChain();

The output is as expected: the array of delays is drained one by one, and the total execution takes 19 seconds

[ 10000, 3000, 3000, 3000 ]
10 ms start delay 10000
10011 ms finished delay 10000
[ 3000, 3000, 3000 ]
10012 ms start delay 3000
13013 ms finished delay 3000
[ 3000, 3000 ]
13014 ms start delay 3000
16014 ms finished delay 3000
[ 3000 ]
16015 ms start delay 3000
19016 ms finished delay 3000
[]

We can start N chains building on top of this logic. Once we start N chains, we wait for all of them to finish

1
2
3
4
5
6
var N = 2, k;
var chains = [];
for (k = 0; k < N; k += 1) {
chains.push(startChain());
}
Promise.all(chains);

The output confirms 2 executors draining the delays array

[ 10000, 3000, 3000, 3000 ]
11 ms start delay 10000
[ 3000, 3000, 3000 ]
12 ms start delay 3000
3013 ms finished delay 3000
[ 3000, 3000 ]
3013 ms start delay 3000
6015 ms finished delay 3000
[ 3000 ]
6015 ms start delay 3000
9016 ms finished delay 3000
[]
10012 ms finished delay 10000
[]

Simple.