人体姿势估计论文：Simple and Lightweight Human Pose Estimation及其PyTorch实现-APISpace

人体姿势估计论文：Simple and Lightweight Human Pose Estimation及其PyTorch实现

Simple and Lightweight Human Pose Estimation PDF: Bottleneck Block

下采样采用如下 Lightweight Bottleneck Block, 同时修改layer4的下采样步长为1

上采样使用group deconvolutional和1x1卷积减少计算, 同时去掉一个上采样层.

β-Soft-Argmax

对Soft-Argmax改进提出一种可以提升位置信息的方法 β-Soft-Argmax

即修改Soft-Argmax

为 β-Soft-Argmax

Iterative Training Strategy

提出一种迭代式的训练方法，在不使用ImageNet数据集上预训练的分类网络时，可以有效改进最终精度。

如下图, 即在训练过程中，每一阶段选择不同的学习率

PyTorch代码:

import torchimport torch.nn as nnimport torchvisionfrom context_block import ContextBlockclass LBwithGCBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(LBwithGCBlock, self).__init__() self.downsample = downsample self.conv1 = nn.Conv2d(in_channels=inplanes,out_channels=planes,kernel_size=1,stride=1,padding=0) self.conv1_bn = nn.BatchNorm2d(planes) self.conv1_bn_relu = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(in_channels=planes, out_channels=planes, kernel_size=3, stride=stride, padding=1) self.conv2_bn = nn.BatchNorm2d(planes) self.conv2_bn_relu = nn.ReLU(inplace=True) self.conv3 = nn.Conv2d(in_channels=planes, out_channels=planes * self.expansion, kernel_size=1, stride=1, padding=0) self.conv3_bn = nn.BatchNorm2d(planes * self.expansion) self.gcb = ContextBlock(planes * self.expansion,ratio=2) self.relu = nn.ReLU(inplace=True) def forward(self, x): residual = x out = self.conv1_bn_relu(self.conv1_bn(self.conv1(x))) out = self.conv2_bn_relu(self.conv2_bn(self.conv2(out))) out = self.conv3_bn(self.conv3(out)) out = self.gcb(out) if self.downsample is not None: residual = self.downsample(x) out += residual return self.relu(out)def computeGCD(a,b): while a != b: if a > b: a = a - b else: b = b - a return bdef GroupDeconv(inplanes, planes, kernel_size, stride, padding, output_padding): groups = computeGCD(inplanes, planes) return nn.Sequential( nn.ConvTranspose2d(in_channels=inplanes, out_channels=2*planes, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, groups=groups), nn.Conv2d(2*planes, planes, kernel_size=1, stride=1, padding=0) )class LPN(nn.Module): def __init__(self, nJoints): super(LPN, self).__init__() self.inplanes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(LBwithGCBlock, 64, 3) self.layer2 = self._make_layer(LBwithGCBlock, 128, 4, stride=2) self.layer3 = self._make_layer(LBwithGCBlock, 256, 6, stride=2) self.layer4 = self._make_layer(LBwithGCBlock, 512, 3, stride=1) self.deconv_layers = self._make_deconv_group_layer() self.final_layer = nn.Conv2d(in_channels=self.inplanes,out_channels=nJoints,kernel_size=1,stride=1,padding=0) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def _make_deconv_group_layer(self): layers = [] planes = 256 for i in range(2): planes = planes//2 # layers.append(nn.ConvTranspose2d(in_channels=self.inplanes,out_channels=256,kernel_size=4,stride=2,padding=1,output_padding=0,groups=computeGCD(self.inplanes,256))) layers.append(GroupDeconv(inplanes=self.inplanes, planes=planes, kernel_size=4, stride=2, padding=1, output_padding=0)) layers.append(nn.BatchNorm2d(planes)) layers.append(nn.ReLU(inplace=True)) self.inplanes = planes return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.deconv_layers(x) x = self.final_layer(x) return xif __name__ == '__main__': model = LPN(nJoints=16) print(model) data = torch.randn(1,3,256,192) out = model(data) print(out.shape)

其中ContextBlock来自torchfrom torch import nnclass ContextBlock(nn.Module): def __init__(self, inplanes, ratio, pooling_type='att', fusion_types=('channel_add', )): super(ContextBlock, self).__init__() assert pooling_type in ['avg', 'att'] assert isinstance(fusion_types, (list, tuple)) valid_fusion_types = ['channel_add', 'channel_mul'] assert all([f in valid_fusion_types for f in fusion_types]) assert len(fusion_types) > 0, 'at least one fusion should be used' self.inplanes = inplanes self.ratio = ratio self.planes = int(inplanes * ratio) self.pooling_type = pooling_type self.fusion_types = fusion_types if pooling_type == 'att': self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1) self.softmax = nn.Softmax(dim=2) else: self.avg_pool = nn.AdaptiveAvgPool2d(1) if 'channel_add' in fusion_types: self.channel_add_conv = nn.Sequential( nn.Conv2d(self.inplanes, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) else: self.channel_add_conv = None if 'channel_mul' in fusion_types: self.channel_mul_conv = nn.Sequential( nn.Conv2d(self.inplanes, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) else: self.channel_mul_conv = None def spatial_pool(self, x): batch, channel, height, width = x.size() if self.pooling_type == 'att': input_x = x # [N, C, H * W] input_x = input_x.view(batch, channel, height * width) # [N, 1, C, H * W] input_x = input_x.unsqueeze(1) # [N, 1, H, W] context_mask = self.conv_mask(x) # [N, 1, H * W] context_mask = context_mask.view(batch, 1, height * width) # [N, 1, H * W] context_mask = self.softmax(context_mask) # [N, 1, H * W, 1] context_mask = context_mask.unsqueeze(-1) # [N, 1, C, 1] context = torch.matmul(input_x, context_mask) # [N, C, 1, 1] context = context.view(batch, channel, 1, 1) else: # [N, C, 1, 1] context = self.avg_pool(x) return context def forward(self, x): # [N, C, 1, 1] context = self.spatial_pool(x) out = x if self.channel_mul_conv is not None: # [N, C, 1, 1] channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) out = out * channel_mul_term if self.channel_add_conv is not None: # [N, C, 1, 1] channel_add_term = self.channel_add_conv(context) out = out + channel_add_term return

c语言sscanf函数的用法是什么

316 2022-09-04

人体姿势估计论文：Simple and Lightweight Human Pose Estimation及其PyTorch实现

c语言sscanf函数的用法是什么

php怎么获取input输入的值

r语言怎么删除数据表某一个数据

推荐文章

api接口有哪几种分类及功能

什么是API接口?API接口简单介绍

短信API接口概述，短信API接口的优势

7款快递物流的物流查询API工具，物流快递查询API接口怎么对接？

企业四要素: 了解企业经营成功的关键

什么是语音验证码?,语音验证码平台有哪些

全国工商查询系统怎么查企业名录

哪些平台提供实名认证的接口？

PHP如何调用API接口?

如何使用百度天气预报API接口?

最近发表

热评文章

数据接口api（数据接口API开发平台）

数据开放接口api（数据服务api开发）

Python爬虫教程：爬取酷狗音乐（python爬取

hbuilder怎么更改字体大小和颜色

直播平台api接口 - 构建卓越的直播平台

实时股票数据api接口（股票实时行情api接口）