Fork me on GitHub

src/arraymancer/laser/primitives/matrix_multiplication/gemm_tiling

  Source Edit

Types

CPUFeatureX86 = enum
  x86_Generic, x86_SSE, x86_SSE2, x86_SSE4_1, x86_AVX, x86_AVX_FMA, x86_AVX2,
  x86_AVX512
  Source Edit
MicroKernel = object
  mr*, nr*: int
  cpu_simd*: CPUFeatureX86
  nb_scalars*: int
  nb_vecs_nr*: int
  c_unit_stride*: bool
  pt*: int
  Source Edit
Tiles[T] = ref object
  a*: ptr UncheckedArray[T]
  b*: ptr UncheckedArray[T]
  mc*, nc*, kc*: int
  ic_num_tasks*: int
  upanelA_size*: int
  Source Edit

Procs

func get_num_tiles(dim_size, tile_size: int): int {.inline, ...raises: [],
    tags: [], forbids: [].}
Get the number of tiles along a dimension depending on the tile size   Source Edit
proc newTiles(ukernel: static MicroKernel; T: typedesc; M, N, K: Natural): Tiles[
    T]
  Source Edit
func partitionMNK(ukernel: static MicroKernel; T: typedesc; M, N, K: Natural): tuple[
    mc, nc, kc: int]
  Source Edit
func x86_ukernel(cpu: CPUFeatureX86; T: typedesc; c_unit_stride: bool): MicroKernel
  Source Edit

Macros

macro extract_c_unit_stride(ukernel: static MicroKernel): untyped
  Source Edit
macro extract_cpu_simd(ukernel: static MicroKernel): untyped
  Source Edit
macro extract_mr(ukernel: static MicroKernel): untyped
  Source Edit
macro extract_nb_scalars(ukernel: static MicroKernel): untyped
  Source Edit
macro extract_nb_vecs_nr(ukernel: static MicroKernel): untyped
  Source Edit
macro extract_nr(ukernel: static MicroKernel): untyped
  Source Edit
macro extract_pt(ukernel: static MicroKernel): untyped
  Source Edit
Arraymancer Technical reference Tutorial Spellbook (How-To's) Under the hood