2using System.Collections.Generic;
14 [TypeConverter(typeof(ExpandableObjectConverter))]
19 double m_dfAttnDropout;
20 double m_dfResidDropout;
21 uint m_nBlockSize = 128;
32 [Description(
"Specifies number of layers (transformer blocks) used.")]
35 get {
return m_nLayers; }
36 set { m_nLayers = value; }
42 [Description(
"Specifies number of heads used.")]
45 get {
return m_nHeads; }
46 set { m_nHeads = value; }
54 get {
return m_nEmbed; }
55 set { m_nEmbed = value; }
63 get {
return m_nBlockSize; }
64 set { m_nBlockSize = value; }
72 get {
return m_dfAttnDropout; }
73 set { m_dfAttnDropout = value; }
81 get {
return m_dfResidDropout; }
82 set { m_dfResidDropout = value; }
86 public override object Load(
System.IO.BinaryReader br,
bool bNewInstance =
true)
127 rgChildren.
Add(
"layers",
layers.ToString());
128 rgChildren.
Add(
"heads",
heads.ToString());
129 rgChildren.
Add(
"embed",
embed.ToString());
134 return new RawProto(strName,
"", rgChildren);
147 if ((strVal = rp.
FindValue(
"layers")) !=
null)
148 p.
layers = uint.Parse(strVal);
150 if ((strVal = rp.
FindValue(
"heads")) !=
null)
151 p.
heads = uint.Parse(strVal);
153 if ((strVal = rp.
FindValue(
"embed")) !=
null)
154 p.
embed = uint.Parse(strVal);
156 if ((strVal = rp.
FindValue(
"block_size")) !=
null)
159 if ((strVal = rp.
FindValue(
"attn_dropout")) !=
null)
162 if ((strVal = rp.
FindValue(
"resid_dropout")) !=
null)
The RawProtoCollection class is a list of RawProto objects.
void Add(RawProto p)
Adds a RawProto to the collection.
The RawProto class is used to parse and output Google prototxt file data.
static RawProto Parse(string str)
Parses a prototxt and places it in a new RawProto.
string FindValue(string strName)
Searches for a falue of a node within this nodes children.
The LayerParameterBase is the base class for all other layer specific parameters.
Specifies the parameters for the CausalSelfAttentionLayer.
override RawProto ToProto(string strName)
Convert the parameter into a RawProto.
uint embed
Specifies size of the embed.
static CausalSelfAttentionParameter FromProto(RawProto rp)
Parses the parameter from a RawProto.
override void Copy(LayerParameterBase src)
Copy on parameter to another.
override object Load(System.IO.BinaryReader br, bool bNewInstance=true)
Load the parameter from a binary reader.
uint heads
The number of heads used.
uint block_size
Specifies size of the block.
double resid_dropout
Specifies dropout probability used on the residual weights.
override LayerParameterBase Clone()
Creates a new copy of this instance of the parameter.
uint layers
The number of layers (transformer blocks) used.
double attn_dropout
Specifies dropout probability used on the attention weights.
CausalSelfAttentionParameter()
Constructor for the parameter.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...