Property/inline performance

Hi there!

This is just a query of interest rather than an actual problem I’m having. I’ve been experimenting with Data Orientated Design and have a created a (very) rudimentary benchmark app for timing some different approaches to normalizing an array of 3D vectors (sorry its fairly long):

import java.util.concurrent.TimeUnit
import kotlin.random.Random
import kotlin.reflect.KProperty
import kotlin.time.Duration
import kotlin.time.DurationUnit
import kotlin.time.measureTime
import kotlin.time.toDuration

val rnd = Random(1234)

data class Vector(var x: Float, var y: Float, var z: Float) {
    fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}

fun timeOop(count: Int): Duration {
    val vectors = Array(count) { Vector(rnd.nextFloat(), rnd.nextFloat(), rnd.nextFloat()) }
    return measureTime {
        for (vector in vectors) {
            vector.normalize()
        }
    }
}

private fun normalizeInArray(xs: FloatArray, ys: FloatArray, zs: FloatArray, i: Int) {
    val x = xs[i]
    val y = ys[i]
    val z = zs[i]
    val l = x * x + y * y + z * z
    xs[i] /= l
    ys[i] /= l
    zs[i] /= l
}

fun timeDod(count: Int): Duration {
    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    return measureTime {
        for (i in xs.indices) {
            normalizeInArray(xs, ys, zs, i)
        }
    }
}

class IntoArrayDelegate(val array: FloatArray, val index: Int) {
    operator fun getValue(thisRef: Any?, property: KProperty<*>): Float {
        return array[index]
    }

    operator fun setValue(thisRef: Any?, property: KProperty<*>, value: Float) {
        array[index] = value
    }
}

class DelegatedVector(xs: FloatArray, ys: FloatArray, zs: FloatArray, index: Int) {
    var x: Float by IntoArrayDelegate(xs, index)
    var y: Float by IntoArrayDelegate(ys, index)
    var z: Float by IntoArrayDelegate(zs, index)

    fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}

fun timeDodDelegated(count: Int): Duration {
    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    val vectors = Array(count) { i -> DelegatedVector(xs, ys, zs, i) }

    return measureTime {
        for (vector in vectors) {
            vector.normalize()
        }
    }
}

class PropertyVector(
    val xs: FloatArray,
    val ys: FloatArray,
    val zs: FloatArray,
    val index: Int
) {
    var x: Float
        get() = xs[index]
        set(value) {
            xs[index] = value
        }

    var y: Float
        get() = ys[index]
        set(value) {
            ys[index] = value
        }

    var z: Float
        get() = zs[index]
        set(value) {
            zs[index] = value
        }

    fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}

class InlinedPropertyVector(
    val xs: FloatArray,
    val ys: FloatArray,
    val zs: FloatArray,
    val index: Int
) {
    var x: Float
        inline get() = xs[index]
        inline set(value) {
            xs[index] = value
        }

    var y: Float
        inline get() = ys[index]
        inline set(value) {
            ys[index] = value
        }

    var z: Float
        inline get() = zs[index]
        inline set(value) {
            zs[index] = value
        }

    inline fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}


fun timeDodProps(count: Int): Duration {
    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    val vectors = Array(count) { i -> PropertyVector(xs, ys, zs, i) }

    return measureTime {
        for (vector in vectors) {
            vector.normalize()
        }
    }
}

fun timeDodPropsInlined(count: Int): Duration {
    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    val vectors = Array(count) { i -> InlinedPropertyVector(xs, ys, zs, i) }

    return measureTime {
        for (vector in vectors) {
            vector.normalize()
        }
    }
}

fun main() {
    println("%10s%10s%10s%10s%10s%10s".format("Count", "OOP", "DOD", "DODd", "DODp", "iDODp"))
    for (count in 1_000_000..10_000_000 step 1_000_000) {

        val oopDurations = ArrayList<Long>()
        val dodDurations = ArrayList<Long>()
        val dodDelegationDurations = ArrayList<Long>()
        val dodPropsDurations = ArrayList<Long>()
        val dodPropsInlinedDurations = ArrayList<Long>()

        for (iteration in 0..10) {
            System.gc()
            oopDurations += timeOop(count).toLong(DurationUnit.NANOSECONDS)
            dodDurations += timeDod(count).toLong(DurationUnit.NANOSECONDS)
            dodDelegationDurations += timeDodDelegated(count).toLong(TimeUnit.NANOSECONDS)
            dodPropsDurations += timeDodProps(count).toLong(DurationUnit.NANOSECONDS)
            dodPropsInlinedDurations += timeDodPropsInlined(count).toLong(DurationUnit.NANOSECONDS)
        }

        val oopDuration = oopDurations.average().toDuration(DurationUnit.NANOSECONDS)
        val dodDuration = dodDurations.average().toDuration(DurationUnit.NANOSECONDS)
        val dodDelegationDuration = dodDelegationDurations.average().toDuration(DurationUnit.NANOSECONDS)
        val dodPropsDuration = dodPropsDurations.average().toDuration(DurationUnit.NANOSECONDS)
        val dodPropsInlinedDuration = dodPropsInlinedDurations.average().toDuration(DurationUnit.NANOSECONDS)

        println("%10s%10s%10s%10s%10s%10s".format(count, oopDuration, dodDuration, dodDelegationDuration, dodPropsDuration, dodPropsInlinedDuration))
    }
}

The output from this code on my machine is:

   Count       OOP       DOD      DODd      DODp     iDODp
   1000000    3.01ms    1.60ms    10.9ms    6.46ms    5.81ms
   2000000    5.17ms    1.12ms    16.9ms    8.85ms    10.1ms
   3000000    7.73ms    1.86ms    26.4ms    14.3ms    15.1ms
   4000000    10.8ms    2.81ms    35.1ms    21.9ms    18.5ms
   5000000    13.7ms    3.82ms    43.6ms    23.1ms    25.2ms
   6000000    16.6ms    4.80ms    51.3ms    26.6ms    30.6ms
   7000000    19.3ms    5.19ms    61.0ms    32.8ms    34.1ms
   8000000    22.0ms    5.96ms    76.7ms    37.3ms    40.6ms
   9000000    24.8ms    6.62ms    79.2ms    45.9ms    40.7ms
  10000000    27.0ms    7.30ms    91.7ms    47.3ms    49.4ms

In summary I’m normalizing vectors:

  • [OOP] As an array of Vector (OOP data class) instances - timeOpp
  • [DOD] As X,Y and Z component float arrays - timeDod (4-5x faster than OOP :smiley: )
  • [DODd] As component arrays with a Vector class facade using delegates - timeDodDelegated/IntoArrayDelegate/DelegatedVector
  • [DODp] As component arrays with a Vector class facade using property getters/setters - timeDodProps/PropertyVector
  • [iDODp] As component arrays with a Vector class facade using inline property getters/setters - timeDodPropsInlined/InlinedPropertyVector

My question is, why are the timings for the property and especially the inlined property facades so far off the vanilla DOD timings? I would have expected the inlined facade to give very similar results…

1 Like

Any benchmarking results that come out of a main function instead of a microbenchmark harness should be ignored. I strongly suggest you to set up your tests with JMH or other equivalent microbenchmarking tools and share the code and results here again.

3 Likes

My first play with micro bench-marking so I’ve no idea if this code is any good :sweat_smile:

package vectors

import org.openjdk.jmh.annotations.*
import java.util.concurrent.TimeUnit
import kotlin.random.Random
import kotlin.reflect.KProperty

val rnd = Random(1234)

data class Vector(var x: Float, var y: Float, var z: Float) {
    fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}

class IntoArrayDelegate(val array: FloatArray, val index: Int) {
    operator fun getValue(thisRef: Any?, property: KProperty<*>): Float {
        return array[index]
    }

    operator fun setValue(thisRef: Any?, property: KProperty<*>, value: Float) {
        array[index] = value
    }
}

class DelegatedVector(xs: FloatArray, ys: FloatArray, zs: FloatArray, index: Int) {
    var x: Float by IntoArrayDelegate(xs, index)
    var y: Float by IntoArrayDelegate(ys, index)
    var z: Float by IntoArrayDelegate(zs, index)

    fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}

class PropertyVector(
    val xs: FloatArray,
    val ys: FloatArray,
    val zs: FloatArray,
    val index: Int
) {
    var x: Float
        get() = xs[index]
        set(value) {
            xs[index] = value
        }

    var y: Float
        get() = ys[index]
        set(value) {
            ys[index] = value
        }

    var z: Float
        get() = zs[index]
        set(value) {
            zs[index] = value
        }

    fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}

class InlinedPropertyVector(
    val xs: FloatArray,
    val ys: FloatArray,
    val zs: FloatArray,
    val index: Int
) {
    var x: Float
        inline get() = xs[index]
        inline set(value) {
            xs[index] = value
        }

    var y: Float
        inline get() = ys[index]
        inline set(value) {
            ys[index] = value
        }

    var z: Float
        inline get() = zs[index]
        inline set(value) {
            zs[index] = value
        }

    inline fun normalize() {
        val l = x * x + y * y + z * z
        x /= l
        y /= l
        z /= l
    }
}

@State(Scope.Benchmark)
open class OopState {
    @Param("100000")
    var count: Int = 0

    var vectors: Array<Vector> = arrayOf()

    @Setup(Level.Invocation)
    fun init() {
        vectors = Array(count) { Vector(rnd.nextFloat(), rnd.nextFloat(), rnd.nextFloat()) }
    }
}

@State(Scope.Benchmark)
open class DodState {
    @Param("100000")
    var count: Int = 0

    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    @Setup(Level.Invocation)
    fun init() {

    }
}

@State(Scope.Benchmark)
open class DodDelegateState {
    @Param("100000")
    var count: Int = 0

    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    val vectors = Array(count) { i -> DelegatedVector(xs, ys, zs, i) }
}

@State(Scope.Benchmark)
open class DodPropertyState {
    @Param("10000000")
    var count: Int = 0

    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    val vectors = Array(count) { i -> PropertyVector(xs, ys, zs, i) }
}

@State(Scope.Benchmark)
open class DodInlinedPropertyState {
    @Param("10000000")
    var count: Int = 0

    val xs = FloatArray(count) { rnd.nextFloat() }
    val ys = FloatArray(count) { rnd.nextFloat() }
    val zs = FloatArray(count) { rnd.nextFloat() }

    val vectors = Array(count) { i -> InlinedPropertyVector(xs, ys, zs, i) }
}

@BenchmarkMode(Mode.All)
@Warmup(iterations = 3)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Measurement(iterations = 1, batchSize = 5)
@Fork(value = 1, warmups = 1)
open class Benchmarks {

    @Benchmark
    fun oopVectors(state: OopState) {
        for(vector in state.vectors) {
            vector.normalize()
        }
    }

    @Benchmark
    fun dodArrays(state: DodState) {
        for (i in state.xs.indices) {
            val x = state.xs[i]
            val y = state.ys[i]
            val z = state.zs[i]
            val l = x * x + y * y + z * z
            state.xs[i] /= l
            state.ys[i] /= l
            state.zs[i] /= l
        }
    }

    @Benchmark
    fun dodVectorDelegates(state: DodDelegateState) {
        for(vector in state.vectors) {
            vector.normalize()
        }
    }

    @Benchmark
    fun dodVectorProperties(state: DodPropertyState) {
        for(vector in state.vectors) {
            vector.normalize()
        }
    }

    @Benchmark
    fun dodVectorInlinedProperties(state: DodInlinedPropertyState) {
        for(vector in state.vectors) {
            vector.normalize()
        }
    }

}

Gives me…

Benchmark                                                                 (count)    Mode     Cnt        Score      Error   Units
Benchmarks.dodArrays                                                       100000   thrpt                0.014             ops/ns
Benchmarks.dodVectorDelegates                                              100000   thrpt                0.365             ops/ns
Benchmarks.dodVectorInlinedProperties                                      100000   thrpt                0.364             ops/ns
Benchmarks.dodVectorProperties                                             100000   thrpt                0.362             ops/ns
Benchmarks.oopVectors                                                      100000   thrpt               ≈ 10⁻⁶             ops/ns
Benchmarks.dodArrays                                                       100000    avgt               72.730              ns/op
Benchmarks.dodVectorDelegates                                              100000    avgt                2.765              ns/op
Benchmarks.dodVectorInlinedProperties                                      100000    avgt                2.714              ns/op
Benchmarks.dodVectorProperties                                             100000    avgt                2.714              ns/op
Benchmarks.oopVectors                                                      100000    avgt          1189224.866              ns/op
Benchmarks.dodArrays                                                       100000  sample  355407       19.277 ±    0.727   ns/op
Benchmarks.dodArrays:dodArrays·p0.00                                       100000  sample               15.000              ns/op
Benchmarks.dodArrays:dodArrays·p0.50                                       100000  sample               18.000              ns/op
Benchmarks.dodArrays:dodArrays·p0.90                                       100000  sample               19.000              ns/op
Benchmarks.dodArrays:dodArrays·p0.95                                       100000  sample               19.000              ns/op
Benchmarks.dodArrays:dodArrays·p0.99                                       100000  sample               20.000              ns/op
Benchmarks.dodArrays:dodArrays·p0.999                                      100000  sample               26.000              ns/op
Benchmarks.dodArrays:dodArrays·p0.9999                                     100000  sample             1325.613              ns/op
Benchmarks.dodArrays:dodArrays·p1.00                                       100000  sample            25792.000              ns/op
Benchmarks.dodVectorDelegates                                              100000  sample  251133       30.107 ±    1.280   ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p0.00                     100000  sample               16.000              ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p0.50                     100000  sample               29.000              ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p0.90                     100000  sample               34.000              ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p0.95                     100000  sample               35.000              ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p0.99                     100000  sample               40.000              ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p0.999                    100000  sample               57.000              ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p0.9999                   100000  sample            12854.928              ns/op
Benchmarks.dodVectorDelegates:dodVectorDelegates·p1.00                     100000  sample            27200.000              ns/op
Benchmarks.dodVectorInlinedProperties                                      100000  sample  227080       29.479 ±    1.721   ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p0.00     100000  sample               15.000              ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p0.50     100000  sample               27.000              ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p0.90     100000  sample               30.000              ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p0.95     100000  sample               35.000              ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p0.99     100000  sample               37.000              ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p0.999    100000  sample               58.000              ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p0.9999   100000  sample            14900.670              ns/op
Benchmarks.dodVectorInlinedProperties:dodVectorInlinedProperties·p1.00     100000  sample            24608.000              ns/op
Benchmarks.dodVectorProperties                                             100000  sample  252660       29.027 ±    1.092   ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p0.00                   100000  sample               15.000              ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p0.50                   100000  sample               29.000              ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p0.90                   100000  sample               33.000              ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p0.95                   100000  sample               34.000              ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p0.99                   100000  sample               38.000              ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p0.999                  100000  sample               48.000              ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p0.9999                 100000  sample             2241.648              ns/op
Benchmarks.dodVectorProperties:dodVectorProperties·p1.00                   100000  sample            25728.000              ns/op
Benchmarks.oopVectors                                                      100000  sample    5881  1056902.769 ± 1361.110   ns/op
Benchmarks.oopVectors:oopVectors·p0.00                                     100000  sample           991232.000              ns/op
Benchmarks.oopVectors:oopVectors·p0.50                                     100000  sample          1062912.000              ns/op
Benchmarks.oopVectors:oopVectors·p0.90                                     100000  sample          1083392.000              ns/op
Benchmarks.oopVectors:oopVectors·p0.95                                     100000  sample          1103872.000              ns/op
Benchmarks.oopVectors:oopVectors·p0.99                                     100000  sample          1138688.000              ns/op
Benchmarks.oopVectors:oopVectors·p0.999                                    100000  sample          1213267.968              ns/op
Benchmarks.oopVectors:oopVectors·p0.9999                                   100000  sample          1273856.000              ns/op
Benchmarks.oopVectors:oopVectors·p1.00                                     100000  sample          1273856.000              ns/op
Benchmarks.dodArrays                                                       100000      ss             1095.000              ns/op
Benchmarks.dodVectorDelegates                                              100000      ss            14374.000              ns/op
Benchmarks.dodVectorInlinedProperties                                      100000      ss             1736.000              ns/op
Benchmarks.dodVectorProperties                                             100000      ss             1674.000              ns/op
Benchmarks.oopVectors                                                      100000      ss          1186483.000              ns/op