io-chess
UCI chess engine
Loading...
Searching...
No Matches
SharedMoEWeights Struct Reference

Contains the globally shared, read-only weights for the Factorized MoE network. More...

#include <MoECacheModel.hpp>

Public Member Functions

void init_architecture (int branchConvLayers)

Public Attributes

std::array< Branch, 12 > branches {}
std::array< float,(size_t) 12 *NET_MIXER_OUT *NET_BRANCH_DIMmixerWBr {}
std::array< float,(size_t) NET_BYPASS *NET_MIXER_OUTmixerWBp {}
std::array< float, NET_MIXER_OUTmixerB {}
std::array< float,(size_t) NET_MIXER_OUT *NET_GLOBALSglobalW {}
std::array< float, NET_MIXER_OUTglobalB {}
std::array< float,(size_t) NET_EXPERTS *NET_GLOBALSgateW {}
std::array< float, NET_EXPERTSgateB {}
std::array< Expert, NET_EXPERTSexperts {}

Static Public Attributes

static constexpr int bd = NET_BRANCH_DIM
static constexpr int nf = NET_MIXER_OUT
static constexpr int nBypass = NET_BYPASS
static constexpr int nGlobals = NET_GLOBALS
static constexpr int nExperts = NET_EXPERTS
static constexpr int ebo = NET_EXPERT_BOTTLENECK
static constexpr int eh = NET_EXPERT_HIDDEN

Detailed Description

Contains the globally shared, read-only weights for the Factorized MoE network.

This struct stores the pre-trained weights for all convolution branches, the mixer layer, the router gate, and the individual expert networks. In the engine, these weights are loaded once into memory and shared across all search threads.

Note
Includes forward(), a full-batch non-incremental evaluation function strictly used by benchmark tools. The engine does not use forward().

Member Function Documentation

◆ init_architecture()

void SharedMoEWeights::init_architecture ( int branchConvLayers)
inline
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ bd

int SharedMoEWeights::bd = NET_BRANCH_DIM
staticconstexpr

◆ branches

std::array<Branch, 12> SharedMoEWeights::branches {}

◆ ebo

int SharedMoEWeights::ebo = NET_EXPERT_BOTTLENECK
staticconstexpr

◆ eh

int SharedMoEWeights::eh = NET_EXPERT_HIDDEN
staticconstexpr

◆ experts

std::array<Expert, NET_EXPERTS> SharedMoEWeights::experts {}

◆ gateB

std::array<float, NET_EXPERTS> SharedMoEWeights::gateB {}

◆ gateW

std::array< float, (size_t)NET_EXPERTS * NET_GLOBALS> SharedMoEWeights::gateW {}

◆ globalB

std::array<float, NET_MIXER_OUT> SharedMoEWeights::globalB {}

◆ globalW

std::array<float, (size_t)NET_MIXER_OUT * NET_GLOBALS> SharedMoEWeights::globalW {}

◆ mixerB

std::array<float, NET_MIXER_OUT> SharedMoEWeights::mixerB {}

◆ mixerWBp

std::array<float, (size_t)NET_BYPASS * NET_MIXER_OUT> SharedMoEWeights::mixerWBp {}

◆ mixerWBr

std::array<float, (size_t)12 * NET_MIXER_OUT * NET_BRANCH_DIM> SharedMoEWeights::mixerWBr {}

◆ nBypass

int SharedMoEWeights::nBypass = NET_BYPASS
staticconstexpr

◆ nExperts

int SharedMoEWeights::nExperts = NET_EXPERTS
staticconstexpr

◆ nf

int SharedMoEWeights::nf = NET_MIXER_OUT
staticconstexpr

◆ nGlobals

int SharedMoEWeights::nGlobals = NET_GLOBALS
staticconstexpr

The documentation for this struct was generated from the following file: