fc_net.py cs231n

n如果有错误，欢迎指出，不胜感激
import numpy as np

from cs231n.layers import *

from cs231n.layer_utils import *

class TwoLayerNet(object):

  """

  A two-layer fully-connected neural network with ReLU nonlinearity and

  softmax loss that uses a modular layer design. We assume an input dimension

  of D, a hidden dimension of H, and perform classification over C classes.

  The architecure should be affine - relu - affine - softmax.

  Note that this class does not implement gradient descent; instead, it

  will interact with a separate Solver object that is responsible for running

  optimization.

  The learnable parameters of the model are stored in the dictionary

  self.params that maps parameter names to numpy arrays.

  """

  def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,

               weight_scale=1e-3, reg=0.0):

    """

    Initialize a new network.

    Inputs:

    - input_dim: An integer giving the size of the input

    - hidden_dim: An integer giving the size of the hidden layer

    - num_classes: An integer giving the number of classes to classify

    - dropout: Scalar between 0 and 1 giving dropout strength.

    - weight_scale: Scalar giving the standard deviation for random

      initialization of the weights.

    - reg: Scalar giving L2 regularization strength.

    """

    self.params = {}

    self.reg = reg

    self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale

    self.params['b1']=np.zeros((hidden_dim,))

    self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale

    self.params['b2']=np.zeros((num_classes,))

  def loss(self, X, y=None):

    """

    Compute loss and gradient for a minibatch of data.

    Inputs:

    - X: Array of input data of shape (N, d_1, ..., d_k)

    - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

    Returns:

    If y is None, then run a test-time forward pass of the model and return:

    - scores: Array of shape (N, C) giving classification scores, where

      scores[i, c] is the classification score for X[i] and class c.

    If y is not None, then run a training-time forward and backward pass and

    return a tuple of:

    - loss: Scalar value giving the loss

    - grads: Dictionary with the same keys as self.params, mapping parameter

      names to gradients of the loss with respect to those parameters.

    """

    scores = None

    out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1'])

    out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2'])

    if y is None :

        return out2

    loss,dx=softmax_loss(out2,y)

    loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) )

    grads={}

    dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2)

    true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1)

    grads['W2']+=self.params['W2']*self.reg

    grads['W1']+=self.params['W1']*self.reg

    #grads['b2']+=self.params['b2']*self.reg

    #grads['b1']+=self.params['b1']*self.reg

    # If y is None then we are in test mode so just return scores

 #   if y is None:

 #     return scores

 #   loss, grads = 0, {}

    return loss, grads

class FullyConnectedNet(object):

  """

  A fully-connected neural network with an arbitrary number of hidden layers,

  ReLU nonlinearities, and a softmax loss function. This will also implement

  dropout and batch normalization as options. For a network with L layers,

  the architecture will be

  {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax

  where batch normalization and dropout are optional, and the {...} block is

  repeated L - 1 times.

  Similar to the TwoLayerNet above, learnable parameters are stored in the

  self.params dictionary and will be learned using the Solver class.

  """

  def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10,

               dropout=0, use_batchnorm=False, reg=0.0,

               weight_scale=1e-2, dtype=np.float32, seed=None):

    """

    Initialize a new FullyConnectedNet.

    Inputs:

    - hidden_dims: A list of integers giving the size of each hidden layer.

    - input_dim: An integer giving the size of the input.

    - num_classes: An integer giving the number of classes to classify.

    - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then

      the network should not use dropout at all.

    - use_batchnorm: Whether or not the network should use batch normalization.

    - reg: Scalar giving L2 regularization strength.

    - weight_scale: Scalar giving the standard deviation for random

      initialization of the weights.

    - dtype: A numpy datatype object; all computations will be performed using

      this datatype. float32 is faster but less accurate, so you should use

      float64 for numeric gradient checking.

    - seed: If not None, then pass this random seed to the dropout layers. This

      will make the dropout layers deteriminstic so we can gradient check the

      model.

    """

    self.use_batchnorm = use_batchnorm

    self.use_dropout = dropout > 0

    self.reg = reg

    self.num_layers = 1 + len(hidden_dims)

    self.dtype = dtype

    self.params = {}

    self.num_layers=len(hidden_dims)

    num_layers=self.num_layers

    last_dims=input_dim

    for i in xrange(num_layers):

        self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale

        self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],)

        if self.use_batchnorm:

            self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],)

            self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],)

        last_dims=hidden_dims[i]

    self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale

    self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,)

    # When using dropout we need to pass a dropout_param dictionary to each

    # dropout layer so that the layer knows the dropout probability and the mode

    # (train / test). You can pass the same dropout_param to each dropout layer.

    self.dropout_param = {}

    if self.use_dropout:

      self.dropout_param = {'mode': 'train', 'p': dropout}

      if seed is not None:

        self.dropout_param['seed'] = seed

    # With batch normalization we need to keep track of running means and

    # variances, so we need to pass a special bn_param object to each batch

    # normalization layer. You should pass self.bn_params[0] to the forward pass

    # of the first batch normalization layer, self.bn_params[1] to the forward

    # pass of the second batch normalization layer, etc.

    self.bn_params = []

    if self.use_batchnorm:

      self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)]

    #print len(self.bn_params)

    # Cast all parameters to the correct datatype

    for k, v in self.params.iteritems():

      self.params[k] = v.astype(dtype)

  def loss(self, X, y=None):

    """

    Compute loss and gradient for the fully-connected net.

    Input / output: Same as TwoLayerNet above.

    """

    X = X.astype(self.dtype)

    mode = 'test' if y is None else 'train'

    # Set train/test mode for batchnorm params and dropout param since they

    # behave differently during training and testing.

    if self.dropout_param is not None:

      self.dropout_param['mode'] = mode   

    if self.use_batchnorm:

      for bn_param in self.bn_params:

        bn_param[mode] = mode

    scores = None

    cache={}

    num_layers=self.num_layers

    next=X

    for i in xrange(num_layers):

        next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)])

        if self.use_batchnorm:

            next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i])

        next,cache['cacher%d'%(i+1)]=relu_forward(next)

        if self.use_dropout:

            next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param)

    scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)])

    # If test mode return early

    if mode == 'test':

      return scores

    loss, grads = 0.0, {}

    loss,dscores=softmax_loss(scores,y)

    for i in xrange(num_layers+1):

        loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg

    dout=dscores

    dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)])

    grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg

    for i in xrange(num_layers):

        i=num_layers-i

        if self.use_dropout:

            dout=dropout_backward(dout,cache['cached%d'%i])

        dout=relu_backward(dout,cache['cacher%d'%i])

        if self.use_batchnorm:

            #print i

            dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i])

        dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i])

        # print "W%d s is "%(i)+str(grads['W%d'%i].shape)

        grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg

    return loss, grads
巴特西

fc_net.py cs231n

最新文章

热门文章