Summing an array is 1/3rd faster when splitting the work across coroutines. I did not expect this result.
I’m adding up values from a decoded image - lots of pixel RGB values stuffed in a large IntArray. I started to wonder if it would be faster to handle each “channel” (step 3) as a separate coroutine to run the summing in parallel.
Much to my surprise, it was. Unless I’m measuring it wrong, which is very possible. But if this is correct, this makes me wonder if there is a better way of doing math on an array that auto-splits to the optimal number of coroutines.
import kotlinx.coroutines.*
import kotlin.random.Random
import kotlin.system.measureNanoTime
const val NUM_TO_AVG = 500
const val LOOPS = 10
const val MAX_INT = 5_000
const val ARR_SIZE = 1080*720*3 // An image worth of pixels
const val IGNORE_FIRST = 3
fun main() {
runBlocking(Dispatchers.Default) {
launch {
val linearTime = arraySumTest(true)
println("Linear: $linearTime")
}
launch {
val cTime = arraySumTest(false)
println("CoroutineTime: $cTime")
}
}
}
suspend fun arraySumTest(isLinear:Boolean): Int {
val arr = IntArray(ARR_SIZE) {
Random.nextInt(MAX_INT)
}
val target = IntArray(arr.size)
val times = mutableListOf<Long>()
repeat(NUM_TO_AVG) {
val time = measureNanoTime {
repeat(LOOPS) {
if(isLinear) {
for(i in target.indices step 3) {
target[i] += arr[i]
target[i+1] += arr[i+1]
target[i+2] += arr[i+2]
}
} else {
coroutineScope {
launch {
for(i in target.indices step 3) {
target[i] += arr[i]
}
}
launch {
for(i in target.indices step 3) {
target[i+1] += arr[i+1]
}
}
launch {
for(i in target.indices step 3) {
target[i+2] += arr[i+2]
}
}
}
}
}
}
times.add(time)
}
return (times.drop(IGNORE_FIRST).average()/1_000).toInt()
}