defsoftmax_loss_naive(W, X, y, reg): """ Softmax loss function, naive implementation (with loops) Inputs have dimension D, there are C classes, and we operate on minibatches of N examples. Inputs: - W: A numpy array of shape (D, C) containing weights. - X: A numpy array of shape (N, D) containing a minibatch of data. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label c, where 0 <= c < C. - reg: (float) regularization strength Returns a tuple of: - loss as single float - gradient with respect to weights W; an array of same shape as W """ # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W)
num_classes = W.shape[1] num_train = X.shape[0]
for i in range(num_train): scores = X[i].dot(W) scores -= np.max(scores) # 一个数值修正的技巧,防止出现数值不稳定的问题 scores = np.exp(scores)
sum_scores = np.sum(scores) # 可以简化写法,节省空间,懒得修改了 P = scores / sum_scores L = -np.log(P)
loss += L[y[i]]
for j in range(num_classes): # 计算梯度,分类讨论 if j == y[i]: dW[:, j] += (-1 + P[y[i]])*X[i].T else: dW[:, j] += P[j]*X[i].T
dW /= num_train dW += reg * W loss /= num_train loss += 0.5 * reg * np.sum(W * W)
defsoftmax_loss_vectorized(W, X, y, reg): """ Softmax loss function, vectorized version. Inputs and outputs are the same as softmax_loss_naive. """ # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W)
defloss(self, X, y=None, reg=0.0): """ Compute the loss and gradients for a two layer fully connected neural network. Inputs: - X: Input data of shape (N, D). Each X[i] is a training sample. - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is an integer in the range 0 <= y[i] < C. This parameter is optional; if it is not passed then we only return scores, and if it is passed then we instead return the loss and gradients. - reg: Regularization strength. Returns: If y is None, return a matrix scores of shape (N, C) where scores[i, c] is the score for class c on input X[i]. If y is not None, instead return a tuple of: - loss: Loss (data loss and regularization loss) for this batch of training samples. - grads: Dictionary mapping parameter names to gradients of those parameters with respect to the loss function; has the same keys as self.params. """ # Unpack variables from the params dictionary W1, b1 = self.params['W1'], self.params['b1'] W2, b2 = self.params['W2'], self.params['b2'] N, D = X.shape
# Compute the forward pass scores = None
h = np.maximum(X @ W1 + b1, 0) scores = h @ W2 + b2
# If the targets are not given then jump out, we're done if y isNone: return scores
# Compute the loss loss = None scores = np.exp(scores) sum_scores = np.sum(scores, axis=1, keepdims=True)
P = scores / sum_scores L = -np.log(P) loss = np.sum(L[np.arange(N), y])
loss /= N loss += 1 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))