mycaffe/html/_transformer_block_parameter_8cs_source.html

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.ComponentModel;

using MyCaffe.basecode;


namespace MyCaffe.param.gpt

{

    [Serializable]

    [TypeConverter(typeof(ExpandableObjectConverter))]

    public class TransformerBlockParameter : LayerParameterBase

    {

        uint m_nHeads = 6;

        uint m_nEmbed = 192;

        double m_dfAttnDropout = 0.1;

        double m_dfResidDropout = 0.1;

        uint m_nBlockSize = 128;

        uint m_nLayers = 6;

        ACTIVATION m_activation = ACTIVATION.RELU;

        BLOCK_TYPE m_type = BLOCK_TYPE.CAUSAL_SELF_ATTENTION;

        bool m_bEnableLayerNormCudaImplementation = false;


        public enum BLOCK_TYPE

        {

            CAUSAL_SELF_ATTENTION = 0,

            ENCODER,

            DECODER

        }


        public enum ACTIVATION

        {

            RELU = 0,

            GELU = 1,

            GELU_BERT = 2

        }


        public TransformerBlockParameter()

        {


        }


        public bool enable_layernorm_cuda_impl

        {

            get { return m_bEnableLayerNormCudaImplementation; }

            set { m_bEnableLayerNormCudaImplementation = value; }

        }


        public ACTIVATION activation

        {

            get { return m_activation; }

            set { m_activation = value; }

        }


        public BLOCK_TYPE block_type

        {

            get { return m_type; }

            set { m_type = value; }

        }


        [Description("Specifies number of layers (transformer blocks) used.")]

        public uint layers

        {

            get { return m_nLayers; }

            set { m_nLayers = value; }

        }


        [Description("Specifies number of heads used.")]

        public uint heads

        {

            get { return m_nHeads; }

            set { m_nHeads = value; }

        }


        public uint embed

        {

            get { return m_nEmbed; }

            set { m_nEmbed = value; }

        }


        public uint block_size

        {

            get { return m_nBlockSize; }

            set { m_nBlockSize = value; }

        }


        public double attn_dropout

        {

            get { return m_dfAttnDropout; }

            set { m_dfAttnDropout = value; }

        }


        public double resid_dropout

        {

            get { return m_dfResidDropout; }

            set { m_dfResidDropout = value; }

        }


        public override object Load(System.IO.BinaryReader br, bool bNewInstance = true)

        {

            RawProto proto = RawProto.Parse(br.ReadString());

            TransformerBlockParameter p = FromProto(proto);


            if (!bNewInstance)

                Copy(p);


            return p;

        }


        public override void Copy(LayerParameterBase src)

        {

            TransformerBlockParameter p = (TransformerBlockParameter)src;


            m_nLayers = p.layers;

            m_nHeads = p.heads;

            m_nEmbed = p.embed;

            m_nBlockSize = p.block_size;

            m_dfAttnDropout = p.attn_dropout;

            m_dfResidDropout = p.resid_dropout;

            m_activation = p.activation;

            m_type = p.block_type;

            m_bEnableLayerNormCudaImplementation = p.enable_layernorm_cuda_impl;

        }


        public override LayerParameterBase Clone()

        {

            TransformerBlockParameter p = new TransformerBlockParameter();

            p.Copy(this);

            return p;

        }


        public override RawProto ToProto(string strName)

        {

            RawProtoCollection rgChildren = new RawProtoCollection();


            rgChildren.Add("layers", layers.ToString());

            rgChildren.Add("heads", heads.ToString());

            rgChildren.Add("embed", embed.ToString());

            rgChildren.Add("block_size", block_size.ToString());

            rgChildren.Add("attn_dropout", attn_dropout.ToString());

            rgChildren.Add("resid_dropout", resid_dropout.ToString());

            rgChildren.Add("activation", activation.ToString());

            rgChildren.Add("block_type", block_type.ToString());

            rgChildren.Add("enable_ln_cuda_impl", enable_layernorm_cuda_impl.ToString());


            return new RawProto(strName, "", rgChildren);

        }


        public static TransformerBlockParameter FromProto(RawProto rp)

        {

            string strVal;

            TransformerBlockParameter p = new TransformerBlockParameter();


            if ((strVal = rp.FindValue("layers")) != null)

                p.layers = uint.Parse(strVal);


            if ((strVal = rp.FindValue("heads")) != null)

                p.heads = uint.Parse(strVal);


            if ((strVal = rp.FindValue("embed")) != null)

                p.embed = uint.Parse(strVal);


            if ((strVal = rp.FindValue("block_size")) != null)

                p.block_size = uint.Parse(strVal);


            if ((strVal = rp.FindValue("attn_dropout")) != null)

                p.attn_dropout = double.Parse(strVal);


            if ((strVal = rp.FindValue("resid_dropout")) != null)

                p.resid_dropout = double.Parse(strVal);


            if ((strVal = rp.FindValue("activation")) != null)

            {

                if (strVal == ACTIVATION.GELU.ToString())

                    p.activation = ACTIVATION.GELU;

                else if (strVal == ACTIVATION.GELU_BERT.ToString())

                    p.activation = ACTIVATION.GELU_BERT;

                else

                    p.activation = ACTIVATION.RELU;

            }


            if ((strVal = rp.FindValue("block_type")) != null)

            {

                if (strVal == BLOCK_TYPE.CAUSAL_SELF_ATTENTION.ToString())

                    p.block_type = BLOCK_TYPE.CAUSAL_SELF_ATTENTION;

                else if (strVal == BLOCK_TYPE.ENCODER.ToString())

                    p.block_type = BLOCK_TYPE.ENCODER;

                else if (strVal == BLOCK_TYPE.DECODER.ToString())

                    p.block_type = BLOCK_TYPE.DECODER;

            }


            if ((strVal = rp.FindValue("enable_ln_cuda_impl")) != null)

                p.enable_layernorm_cuda_impl = bool.Parse(strVal);


            return p;

        }

    }

}

MyCaffe.basecode.RawProtoCollection
The RawProtoCollection class is a list of RawProto objects.
Definition: RawProtoCollection.cs:12

MyCaffe.basecode.RawProtoCollection.Add
void Add(RawProto p)
Adds a RawProto to the collection.
Definition: RawProtoCollection.cs:55

MyCaffe.basecode.RawProto
The RawProto class is used to parse and output Google prototxt file data.
Definition: RawProto.cs:17

MyCaffe.basecode.RawProto.Parse
static RawProto Parse(string str)
Parses a prototxt and places it in a new RawProto.
Definition: RawProto.cs:306

MyCaffe.basecode.RawProto.FindValue
string FindValue(string strName)
Searches for a falue of a node within this nodes children.
Definition: RawProto.cs:105

MyCaffe.param.LayerParameterBase
The LayerParameterBase is the base class for all other layer specific parameters.
Definition: LayerParameterBase.cs:18

MyCaffe.param.gpt.TransformerBlockParameter
Specifies the parameters for the TransformerBlockLayer.
Definition: TransformerBlockParameter.cs:16

MyCaffe.param.gpt.TransformerBlockParameter.Load
override object Load(System.IO.BinaryReader br, bool bNewInstance=true)
Load the parameter from a binary reader.
Definition: TransformerBlockParameter.cs:158

MyCaffe.param.gpt.TransformerBlockParameter.resid_dropout
double resid_dropout
Specifies dropout probability used on the residual weights.
Definition: TransformerBlockParameter.cs:152

MyCaffe.param.gpt.TransformerBlockParameter.ACTIVATION
ACTIVATION
Defines the various activations supported by the TransformerBlock.
Definition: TransformerBlockParameter.cs:50

MyCaffe.param.gpt.TransformerBlockParameter.layers
uint layers
The number of layers (transformer blocks) used.
Definition: TransformerBlockParameter.cs:106

MyCaffe.param.gpt.TransformerBlockParameter.heads
uint heads
The number of heads used.
Definition: TransformerBlockParameter.cs:116

MyCaffe.param.gpt.TransformerBlockParameter.FromProto
static TransformerBlockParameter FromProto(RawProto rp)
Parses the parameter from a RawProto.
Definition: TransformerBlockParameter.cs:220

MyCaffe.param.gpt.TransformerBlockParameter.ToProto
override RawProto ToProto(string strName)
Convert the parameter into a RawProto.
Definition: TransformerBlockParameter.cs:198

MyCaffe.param.gpt.TransformerBlockParameter.Copy
override void Copy(LayerParameterBase src)
Copy on parameter to another.
Definition: TransformerBlockParameter.cs:170

MyCaffe.param.gpt.TransformerBlockParameter.block_type
BLOCK_TYPE block_type
Specifies the type of transformer block to configure.
Definition: TransformerBlockParameter.cs:96

MyCaffe.param.gpt.TransformerBlockParameter.activation
ACTIVATION activation
Specifies the activation type to use (default = RELU)
Definition: TransformerBlockParameter.cs:87

MyCaffe.param.gpt.TransformerBlockParameter.enable_layernorm_cuda_impl
bool enable_layernorm_cuda_impl
Specifies to use the low-level full cuda implementation of LayerNorm (default = false).
Definition: TransformerBlockParameter.cs:78

MyCaffe.param.gpt.TransformerBlockParameter.TransformerBlockParameter
TransformerBlockParameter()
Constructor for the parameter.
Definition: TransformerBlockParameter.cs:66

MyCaffe.param.gpt.TransformerBlockParameter.attn_dropout
double attn_dropout
Specifies dropout probability used on the attention weights.
Definition: TransformerBlockParameter.cs:143

MyCaffe.param.gpt.TransformerBlockParameter.BLOCK_TYPE
BLOCK_TYPE
Defines the type of transformer block
Definition: TransformerBlockParameter.cs:31

MyCaffe.param.gpt.TransformerBlockParameter.embed
uint embed
Specifies size of the embed.
Definition: TransformerBlockParameter.cs:125

MyCaffe.param.gpt.TransformerBlockParameter.block_size
uint block_size
Specifies size of the block.
Definition: TransformerBlockParameter.cs:134

MyCaffe.param.gpt.TransformerBlockParameter.Clone
override LayerParameterBase Clone()
Creates a new copy of this instance of the parameter.
Definition: TransformerBlockParameter.cs:186

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.param.gpt
Definition: CausalSelfAttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System.ComponentModel
Definition: Component.cs:11

System
Definition: Component.cs:11