2using System.Collections.Generic;
14 [TypeConverter(typeof(ExpandableObjectConverter))]
19 double m_dfAttnDropout;
20 double m_dfResidDropout;
21 uint m_nBlockSize = 128;
48 [Description(
"Specifies number of layers (transformer blocks) used.")]
51 get {
return m_nLayers; }
52 set { m_nLayers = value; }
58 [Description(
"Specifies number of heads used.")]
61 get {
return m_nHeads; }
62 set { m_nHeads = value; }
70 get {
return m_nEmbed; }
71 set { m_nEmbed = value; }
79 get {
return m_nBlockSize; }
80 set { m_nBlockSize = value; }
88 get {
return m_dfAttnDropout; }
89 set { m_dfAttnDropout = value; }
97 get {
return m_dfResidDropout; }
98 set { m_dfResidDropout = value; }
106 get {
return m_weightInit; }
107 set { m_weightInit = value; }
111 public override object Load(
System.IO.BinaryReader br,
bool bNewInstance =
true)
153 rgChildren.
Add(
"layers",
layers.ToString());
154 rgChildren.
Add(
"heads",
heads.ToString());
155 rgChildren.
Add(
"embed",
embed.ToString());
161 return new RawProto(strName,
"", rgChildren);
174 if ((strVal = rp.
FindValue(
"layers")) !=
null)
175 p.
layers = uint.Parse(strVal);
177 if ((strVal = rp.
FindValue(
"heads")) !=
null)
178 p.
heads = uint.Parse(strVal);
180 if ((strVal = rp.
FindValue(
"embed")) !=
null)
181 p.
embed = uint.Parse(strVal);
183 if ((strVal = rp.
FindValue(
"block_size")) !=
null)
186 if ((strVal = rp.
FindValue(
"attn_dropout")) !=
null)
189 if ((strVal = rp.
FindValue(
"resid_dropout")) !=
null)
192 if ((strVal = rp.
FindValue(
"weight_init")) !=
null)
196 else if (strVal ==
WEIGHT_INIT.ENCODER_DECODER.ToString())
199 throw new Exception(
"Unknown weight init strategy '" + strVal +
"'!");
The RawProtoCollection class is a list of RawProto objects.
void Add(RawProto p)
Adds a RawProto to the collection.
The RawProto class is used to parse and output Google prototxt file data.
static RawProto Parse(string str)
Parses a prototxt and places it in a new RawProto.
string FindValue(string strName)
Searches for a falue of a node within this nodes children.
The LayerParameterBase is the base class for all other layer specific parameters.
Specifies the parameters for the MultiheadAttentionLayer.
WEIGHT_INIT
Defines the weight initialization strategy.
double attn_dropout
Specifies dropout probability used on the attention weights.
override RawProto ToProto(string strName)
Convert the parameter into a RawProto.
uint block_size
Specifies size of the block.
override object Load(System.IO.BinaryReader br, bool bNewInstance=true)
Load the parameter from a binary reader.
uint heads
The number of heads used.
uint layers
The number of layers (transformer blocks) used.
double resid_dropout
Specifies dropout probability used on the residual weights.
override LayerParameterBase Clone()
Creates a new copy of this instance of the parameter.
MultiheadAttentionParameter()
Constructor for the parameter.
override void Copy(LayerParameterBase src)
Copy on parameter to another.
uint embed
Specifies size of the embed.
static MultiheadAttentionParameter FromProto(RawProto rp)
Parses the parameter from a RawProto.
WEIGHT_INIT weight_init
Specifies the weight initialization strategy (default = ENCODER_DECODER).
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...