-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
clip
https://www.cnblogs.com/chester-cs/p/17478159.html
https://github.com/openai/CLIP/blob/main/clip/model.py
https://github.com/moein-shariatnia/OpenAI-CLIP/blob/master/CLIP.py
def forward(self, batch):
# Getting Image and Text Features
image_features = self.image_encoder(batch["image"])
text_features = self.text_encoder(
input_ids=batch["input_ids"], attention_mask=batch["attention_mask"]
)
# Getting Image and Text Embeddings (with same dimension)
image_embeddings = self.image_projection(image_features)
text_embeddings = self.text_projection(text_features)
# Calculating the Loss
logits = (text_embeddings @ image_embeddings.T) / self.temperature
images_similarity = image_embeddings @ image_embeddings.T
texts_similarity = text_embeddings @ text_embeddings.T
targets = F.softmax(
(images_similarity + texts_similarity) / 2 * self.temperature, dim=-1
)
texts_loss = cross_entropy(logits, targets, reduction='none')
images_loss = cross_entropy(logits.T, targets.T, reduction='none')
loss = (images_loss + texts_loss) / 2.0 # shape: (batch_size)
return loss.mean()
def cross_entropy(preds, targets, reduction='none'):
log_softmax = nn.LogSoftmax(dim=-1)
loss = (-targets * log_softmax(preds)).sum(1)
if reduction == "none":
return loss
elif reduction == "mean":
return loss.mean()
image_embeds = vision_outputs[1]
image_embeds = self.visual_projection(image_embeds)
text_embeds = text_outputs[1]
text_embeds = self.text_projection(text_embeds)
# normalized features
image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_text = torch.matmul(text_embeds, image_embeds.t()) * logit_scale
logits_per_image = logits_per_text.t()
loss = None
if return_loss:
loss = clip_loss(logits_per_text)
剑指offer 经典
https://zhuanlan.zhihu.com/p/453204032
二分
https://leetcode.cn/problems/find-first-and-last-position-of-element-in-sorted-array
在排序数组中查找元素的第一个和最后一个位置 left_bound right_bound
https://leetcode.cn/problems/search-a-2d-matrix-ii/solutions/ #搜索二维矩阵
def searchMatrix(self, matrix: List[List[int]], target: int) -> bool:
for row in matrix:
idx = bisect.bisect_left(row, target)
if idx < len(row) and row[idx] == target:
return True
return False
def searchMatrix(self, matrix: List[List[int]], target: int) -> bool:
i, j = len(matrix) - 1, 0
while i >= 0 and j < len(matrix[0]):
if matrix[i][j] > target: i -= 1
elif matrix[i][j] < target: j += 1
else: return True
return False
https://leetcode.cn/problems/search-in-rotated-sorted-array/ # 搜索旋转排序数组(关键点在和开头结尾的比一下)
def search(self, nums, target):
"""
:type nums: List[int]
:type target: int
:rtype: int
"""
if len(nums) <= 0:
return -1
l,r = 0,len(nums)-1
while l <= r:
mid = (l + r) // 2
if nums[mid] == target:
return mid
elif nums[r] == target:
return r
elif nums[l] == target:
return l
elif nums[mid] > nums[l]:
if nums[l]< target < nums[mid]:
r = mid - 1
else:
l = mid + 1
else:
if nums[mid] < target <= nums[r]:
l = mid + 1
else:
r = mid - 1
return l if nums[l] == target else -1
-欧拉距离
# coding
https://juejin.cn/s/pytorch%20multiheadattention%E4%BD%BF%E7%94%A8
import torch
import torch.nn as nn
class MultiHeadAttention(nn.Module):
def __init__(self, d_model, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.d_k = d_model // num_heads
self.q_linear = nn.Linear(d_model, d_model)
self.v_linear = nn.Linear(d_model, d_model)
self.k_linear = nn.Linear(d_model, d_model)
self.out = nn.Linear(d_model, d_model)
def forward(self, q, k, v, mask=None):
bs = q.size(0)
# 全连接层变换
k = self.k_linear(k).view(bs, -1, self.num_heads, self.d_k)
q = self.q_linear(q).view(bs, -1, self.num_heads, self.d_k)
v = self.v_linear(v).view(bs, -1, self.num_heads, self.d_k)
# 转置操作
k = k.transpose(1,2)
q = q.transpose(1,2)
v = v.transpose(1,2)
# 计算Scaled Dot-Product Attention
scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
if mask is not None:
mask = mask.unsqueeze(1)
scores = scores.masked_fill(mask == 0, -1e9)
scores = nn.functional.softmax(scores, dim=-1)
output = torch.matmul(scores, v)
# 拼接操作
output = output.transpose(1,2).contiguous().view(bs, -1, self.num_heads*self.d_k)
# 全连接层变换
output = self.out(output)
return output
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels