Skip to content

力抠 #20

@testpppppp

Description

@testpppppp

clip

https://www.cnblogs.com/chester-cs/p/17478159.html
https://github.com/openai/CLIP/blob/main/clip/model.py
https://github.com/moein-shariatnia/OpenAI-CLIP/blob/master/CLIP.py


    def forward(self, batch):
        # Getting Image and Text Features
        image_features = self.image_encoder(batch["image"])
        text_features = self.text_encoder(
            input_ids=batch["input_ids"], attention_mask=batch["attention_mask"]
        )
        # Getting Image and Text Embeddings (with same dimension)
        image_embeddings = self.image_projection(image_features)
        text_embeddings = self.text_projection(text_features)

        # Calculating the Loss
        logits = (text_embeddings @ image_embeddings.T) / self.temperature
        images_similarity = image_embeddings @ image_embeddings.T
        texts_similarity = text_embeddings @ text_embeddings.T
        targets = F.softmax(
            (images_similarity + texts_similarity) / 2 * self.temperature, dim=-1
        )
        texts_loss = cross_entropy(logits, targets, reduction='none')
        images_loss = cross_entropy(logits.T, targets.T, reduction='none')
        loss =  (images_loss + texts_loss) / 2.0 # shape: (batch_size)
        return loss.mean()


def cross_entropy(preds, targets, reduction='none'):
    log_softmax = nn.LogSoftmax(dim=-1)
    loss = (-targets * log_softmax(preds)).sum(1)
    if reduction == "none":
        return loss
    elif reduction == "mean":
        return loss.mean()


image_embeds = vision_outputs[1]
image_embeds = self.visual_projection(image_embeds)

text_embeds = text_outputs[1]
text_embeds = self.text_projection(text_embeds)

# normalized features
image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)

# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_text = torch.matmul(text_embeds, image_embeds.t()) * logit_scale
logits_per_image = logits_per_text.t()

loss = None
if return_loss:
    loss = clip_loss(logits_per_text)

剑指offer 经典

https://zhuanlan.zhihu.com/p/453204032

二分

https://leetcode.cn/problems/find-first-and-last-position-of-element-in-sorted-array
在排序数组中查找元素的第一个和最后一个位置 left_bound right_bound
https://leetcode.cn/problems/search-a-2d-matrix-ii/solutions/ #搜索二维矩阵

def searchMatrix(self, matrix: List[List[int]], target: int) -> bool:
        for row in matrix:
            idx = bisect.bisect_left(row, target)
            if idx < len(row) and row[idx] == target:
                return True
        return False

 def searchMatrix(self, matrix: List[List[int]], target: int) -> bool:
        i, j = len(matrix) - 1, 0
        while i >= 0 and j < len(matrix[0]):
            if matrix[i][j] > target: i -= 1
            elif matrix[i][j] < target: j += 1
            else: return True
        return False

https://leetcode.cn/problems/search-in-rotated-sorted-array/ # 搜索旋转排序数组(关键点在和开头结尾的比一下)

def search(self, nums, target):
      """
      :type nums: List[int]
      :type target: int
      :rtype: int
      """
      if len(nums) <= 0:
          return -1
      l,r = 0,len(nums)-1
      while l <= r:
          mid = (l + r) // 2
          if nums[mid] == target:
              return mid
          elif nums[r] == target:
              return r
          elif nums[l] == target:
              return l
          elif nums[mid] > nums[l]:
              if nums[l]< target < nums[mid]:
                  r = mid - 1
              else:
                  l = mid + 1
          else:
              if nums[mid] < target <= nums[r]:
                  l = mid + 1
              else:
                  r = mid - 1
      return l if nums[l] == target else -1

-欧拉距离

# coding
https://juejin.cn/s/pytorch%20multiheadattention%E4%BD%BF%E7%94%A8
import torch
import torch.nn as nn

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        
        self.num_heads = num_heads
        self.d_k = d_model // num_heads
        
        self.q_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.out = nn.Linear(d_model, d_model)

    def forward(self, q, k, v, mask=None):
        bs = q.size(0)

        # 全连接层变换
        k = self.k_linear(k).view(bs, -1, self.num_heads, self.d_k)
        q = self.q_linear(q).view(bs, -1, self.num_heads, self.d_k)
        v = self.v_linear(v).view(bs, -1, self.num_heads, self.d_k)

        # 转置操作
        k = k.transpose(1,2)
        q = q.transpose(1,2)
        v = v.transpose(1,2)

        # 计算Scaled Dot-Product Attention
        scores = torch.matmul(q, k.transpose(-2, -1)) /  math.sqrt(self.d_k)
        if mask is not None:
            mask = mask.unsqueeze(1)
            scores = scores.masked_fill(mask == 0, -1e9)
        scores = nn.functional.softmax(scores, dim=-1)
        output = torch.matmul(scores, v)

        # 拼接操作
        output = output.transpose(1,2).contiguous().view(bs, -1, self.num_heads*self.d_k)

        # 全连接层变换
        output = self.out(output)
        return output

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions