Operator: aten._log_softmax.default
cnt: 1, ((T([64, 1000], f16), 1, False), {})
Operator: aten._log_softmax_backward_data.default
cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {})
Operator: aten._softmax.default
cnt: 2, ((T([64, 4, 16, 196, 196], f16), -1, False), {})
cnt: 2, ((T([64, 8, 4, 196, 196], f16), -1, False), {})
cnt: 20, ((T([64, 16, 1, 196, 196], f16), -1, False), {})
Operator: aten._softmax_backward_data.default
cnt: 20, ((T([64, 16, 1, 196, 196], f16), T([64, 16, 1, 196, 196], f16), -1, f16), {})
cnt: 2, ((T([64, 8, 4, 196, 196], f16), T([64, 8, 4, 196, 196], f16), -1, f16), {})
cnt: 2, ((T([64, 4, 16, 196, 196], f16), T([64, 4, 16, 196, 196], f16), -1, f16), {})
Operator: aten._unsafe_view.default
cnt: 2, ((T([64, 4, 4, 14, 14, 128], f16), [64, 16, 196, 128]), {})
cnt: 2, ((T([200704, 384], f16), [64, 16, 196, 384]), {})
cnt: 6, ((T([64, 4, 16, 196, 32], f16), [4096, 196, 32]), {})
cnt: 2, ((T([64, 4, 16, 32, 196], f16), [4096, 32, 196]), {})
cnt: 2, ((T([4096, 196, 196], f16), [64, 4, 16, 196, 196]), {})
cnt: 2, ((T([4096, 196, 32], f16), [64, 4, 16, 196, 32]), {})
cnt: 2, ((T([64, 16, 196, 32, 4], f16), [64, 16, 196, 128]), {})
cnt: 4, ((T([200704, 128], f16), [64, 16, 196, 128]), {})
cnt: 2, ((T([200704, 512], f16), [64, 16, 196, 512]), {})
cnt: 2, ((T([64, 4, 14, 4, 14, 128], f16), [64, 56, 56, 128]), {})
cnt: 2, ((T([64, 2, 2, 14, 14, 256], f16), [64, 4, 196, 256]), {})
cnt: 2, ((T([50176, 768], f16), [64, 4, 196, 768]), {})
cnt: 6, ((T([64, 8, 4, 196, 32], f16), [2048, 196, 32]), {})
cnt: 2, ((T([64, 8, 4, 32, 196], f16), [2048, 32, 196]), {})
cnt: 2, ((T([2048, 196, 196], f16), [64, 8, 4, 196, 196]), {})
cnt: 2, ((T([2048, 196, 32], f16), [64, 8, 4, 196, 32]), {})
cnt: 2, ((T([64, 4, 196, 32, 8], f16), [64, 4, 196, 256]), {})
cnt: 4, ((T([50176, 256], f16), [64, 4, 196, 256]), {})
cnt: 2, ((T([50176, 1024], f16), [64, 4, 196, 1024]), {})
cnt: 2, ((T([64, 2, 14, 2, 14, 256], f16), [64, 28, 28, 256]), {})
cnt: 20, ((T([12544, 1536], f16), [64, 1, 196, 1536]), {})
cnt: 60, ((T([64, 16, 1, 196, 32], f16), [1024, 196, 32]), {})
cnt: 20, ((T([64, 16, 1, 32, 196], f16), [1024, 32, 196]), {})
cnt: 20, ((T([1024, 196, 196], f16), [64, 16, 1, 196, 196]), {})
cnt: 20, ((T([1024, 196, 32], f16), [64, 16, 1, 196, 32]), {})
cnt: 20, ((T([64, 1, 196, 32, 16], f16), [64, 1, 196, 512]), {})
cnt: 40, ((T([12544, 512], f16), [64, 1, 196, 512]), {})
cnt: 20, ((T([12544, 2048], f16), [64, 1, 196, 2048]), {})
cnt: 40, ((T([64, 1, 196, 512], f16), [12544, 512]), {})
cnt: 20, ((T([64, 1, 196, 3, 16, 32], f16), [64, 1, 196, 1536]), {})
cnt: 2, ((T([64, 4, 196, 3, 8, 32], f16), [64, 4, 196, 768]), {})
cnt: 2, ((T([64, 16, 196, 3, 4, 32], f16), [64, 16, 196, 384]), {})
Operator: aten.add.Tensor
cnt: 1, ((T([64, 16, 196, 128], f16), T([1, 16, 196, 128], f16)), {})
cnt: 2, ((T([64, 16, 196, 384], f16), T([384], f16)), {})
cnt: 4, ((T([64, 16, 196, 128], f16), T([128], f16)), {})
cnt: 8, ((T([64, 16, 196, 128], f16), T([64, 16, 196, 128], f16)), {})
cnt: 2, ((T([64, 16, 196, 512], f16), T([512], f16)), {})
cnt: 1, ((T([64, 4, 196, 256], f16), T([1, 4, 196, 256], f16)), {})
cnt: 2, ((T([64, 4, 196, 768], f16), T([768], f16)), {})
cnt: 4, ((T([64, 4, 196, 256], f16), T([256], f16)), {})
cnt: 8, ((T([64, 4, 196, 256], f16), T([64, 4, 196, 256], f16)), {})
cnt: 2, ((T([64, 4, 196, 1024], f16), T([1024], f16)), {})
cnt: 1, ((T([64, 1, 196, 512], f16), T([1, 1, 196, 512], f16)), {})
cnt: 20, ((T([64, 1, 196, 1536], f16), T([1536], f16)), {})
cnt: 40, ((T([64, 1, 196, 512], f16), T([512], f16)), {})
cnt: 40, ((T([64, 1, 196, 512], f16), T([64, 1, 196, 512], f16)), {})
cnt: 20, ((T([64, 1, 196, 2048], f16), T([2048], f16)), {})
cnt: 40, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), T([64, 1, 196, 512], f16)), {})
Operator: aten.addmm.default
cnt: 1, ((T([1000], f16), T([64, 512], f16), T([512, 1000], f16, stride=(1, 512))), {})
Operator: aten.as_strided_.default
cnt: 1, ((T([64, 512, 1, 1], f16), [64, 512, 1, 1], [512, 1, 512, 512]), {})
Operator: aten.bernoulli_.float
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9782608691602945), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9565217383205891), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9347826093435287), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9130434766411781), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8913043439388275), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8695652186870575), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8478260785341263), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8260869532823563), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8043478280305862), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.782608687877655), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.760869562625885), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.739130437374115), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.717391312122345), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.695652186870575), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6739130318164825), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6521739065647125), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6304347813129425), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6086956560611725), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.5869565308094025), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.5652174055576324), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.54347825050354), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.52173912525177), {})
cnt: 2, ((T([64, 1, 1, 1], f16),), {})
Operator: aten.bmm.default
cnt: 2, ((T([4096, 196, 32], f16), T([4096, 32, 196], f16)), {})
cnt: 2, ((T([4096, 196, 196], f16), T([4096, 196, 32], f16)), {})
cnt: 2, ((T([2048, 196, 32], f16), T([2048, 32, 196], f16)), {})
cnt: 2, ((T([2048, 196, 196], f16), T([2048, 196, 32], f16)), {})
cnt: 20, ((T([1024, 196, 32], f16), T([1024, 32, 196], f16)), {})
cnt: 20, ((T([1024, 196, 196], f16), T([1024, 196, 32], f16)), {})
cnt: 20, ((T([1024, 196, 196], f16, stride=(38416, 1, 196)), T([1024, 196, 32], f16)), {})
cnt: 20, ((T([1024, 196, 32], f16), T([1024, 32, 196], f16, stride=(6272, 1, 32))), {})
cnt: 20, ((T([1024, 32, 196], f16, stride=(6272, 1, 32)), T([1024, 196, 196], f16)), {})
cnt: 20, ((T([1024, 196, 196], f16), T([1024, 196, 32], f16, stride=(6272, 1, 196))), {})
cnt: 2, ((T([2048, 196, 196], f16, stride=(38416, 1, 196)), T([2048, 196, 32], f16)), {})
cnt: 2, ((T([2048, 196, 32], f16), T([2048, 32, 196], f16, stride=(6272, 1, 32))), {})
cnt: 2, ((T([2048, 32, 196], f16, stride=(6272, 1, 32)), T([2048, 196, 196], f16)), {})
cnt: 2, ((T([2048, 196, 196], f16), T([2048, 196, 32], f16, stride=(6272, 1, 196))), {})
cnt: 2, ((T([4096, 196, 196], f16, stride=(38416, 1, 196)), T([4096, 196, 32], f16)), {})
cnt: 2, ((T([4096, 196, 32], f16), T([4096, 32, 196], f16, stride=(6272, 1, 32))), {})
cnt: 2, ((T([4096, 32, 196], f16, stride=(6272, 1, 32)), T([4096, 196, 196], f16)), {})
cnt: 2, ((T([4096, 196, 196], f16), T([4096, 196, 32], f16, stride=(6272, 1, 196))), {})
Operator: aten.clone.default
cnt: 1, ((T([64, 3, 224, 224], f16),), {})
Operator: aten.constant_pad_nd.default
cnt: 1, ((T([64, 256, 56, 56], f16, stride=(802816, 1, 14336, 256)), [0, 1, 0, 1], -inf), {})
cnt: 1, ((T([64, 512, 28, 28], f16, stride=(401408, 1, 14336, 512)), [0, 1, 0, 1], -inf), {})
cnt: 1, ((T([64, 512, 29, 29], f16, stride=(430592, 1, 14848, 512)), [0, -1, 0, -1]), {})
cnt: 1, ((T([64, 256, 57, 57], f16, stride=(831744, 1, 14592, 256)), [0, -1, 0, -1]), {})
Operator: aten.convolution.default
cnt: 1, ((T([64, 3, 224, 224], f16), T([128, 3, 4, 4], f16), T([128], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {})
cnt: 1, ((T([64, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {})
cnt: 1, ((T([64, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([512, 256, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {})
Operator: aten.convolution_backward.default
cnt: 1, ((T([64, 512, 28, 28], f16, stride=(401408, 1, 14336, 512)), T([64, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([512, 256, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {})
cnt: 1, ((T([64, 256, 56, 56], f16, stride=(802816, 1, 14336, 256)), T([64, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {})
cnt: 1, ((T([64, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([64, 3, 224, 224], f16), T([128, 3, 4, 4], f16), [128], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {})
Operator: aten.copy_.default
cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {})
cnt: 1, ((T([64, 512], f16), T([64, 512], f16)), {})
cnt: 1, ((T([512, 256, 3, 3], f16), T([512, 256, 3, 3], f16, stride=(2304, 1, 768, 256))), {})
cnt: 1, ((T([256, 128, 3, 3], f16), T([256, 128, 3, 3], f16, stride=(1152, 1, 384, 128))), {})
Operator: aten.div.Scalar
cnt: 1, ((T([64, 512, 14, 14], f16, stride=(512, 1, 0, 0)), 196), {})
Operator: aten.div_.Tensor
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9782608691602945), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9565217383205891), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9347826093435287), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.9130434766411781), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8913043439388275), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8695652186870575), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8478260785341263), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8260869532823563), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.8043478280305862), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.782608687877655), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.760869562625885), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.739130437374115), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.717391312122345), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.695652186870575), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6739130318164825), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6521739065647125), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6304347813129425), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.6086956560611725), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.5869565308094025), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.5652174055576324), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.54347825050354), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.52173912525177), {})
cnt: 2, ((T([64, 1, 1, 1], f16), 0.5), {})
Operator: aten.gelu.default
cnt: 2, ((T([64, 16, 196, 512], f16),), {})
cnt: 2, ((T([64, 4, 196, 1024], f16),), {})
cnt: 20, ((T([64, 1, 196, 2048], f16),), {})
Operator: aten.gelu_backward.default
cnt: 20, ((T([64, 1, 196, 2048], f16), T([64, 1, 196, 2048], f16)), {})
cnt: 2, ((T([64, 4, 196, 1024], f16), T([64, 4, 196, 1024], f16)), {})
cnt: 2, ((T([64, 16, 196, 512], f16), T([64, 16, 196, 512], f16)), {})
Operator: aten.lift_fresh_copy.default
cnt: 1, ((T([64], i64),), {})
Operator: aten.max_pool2d_with_indices.default
cnt: 1, ((T([64, 256, 57, 57], f16, stride=(831744, 1, 14592, 256)), [3, 3], [2, 2]), {})
cnt: 1, ((T([64, 512, 29, 29], f16, stride=(430592, 1, 14848, 512)), [3, 3], [2, 2]), {})
Operator: aten.max_pool2d_with_indices_backward.default
cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 29, 29], f16, stride=(430592, 1, 14848, 512)), [3, 3], [2, 2], [0, 0], [1, 1], False, T([64, 512, 14, 14], i64, stride=(100352, 1, 7168, 512))), {})
cnt: 1, ((T([64, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([64, 256, 57, 57], f16, stride=(831744, 1, 14592, 256)), [3, 3], [2, 2], [0, 0], [1, 1], False, T([64, 256, 28, 28], i64, stride=(200704, 1, 7168, 256))), {})
Operator: aten.mean.dim
cnt: 1, ((T([64, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), [-1, -2], True), {})
Operator: aten.mm.default
cnt: 2, ((T([200704, 128], f16), T([128, 384], f16, stride=(1, 128))), {})
cnt: 2, ((T([200704, 128], f16), T([128, 128], f16, stride=(1, 128))), {})
cnt: 2, ((T([200704, 128], f16), T([128, 512], f16, stride=(1, 128))), {})
cnt: 2, ((T([200704, 512], f16), T([512, 128], f16, stride=(1, 512))), {})
cnt: 2, ((T([50176, 256], f16), T([256, 768], f16, stride=(1, 256))), {})
cnt: 2, ((T([50176, 256], f16), T([256, 256], f16, stride=(1, 256))), {})
cnt: 2, ((T([50176, 256], f16), T([256, 1024], f16, stride=(1, 256))), {})
cnt: 2, ((T([50176, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {})
cnt: 20, ((T([12544, 512], f16), T([512, 1536], f16, stride=(1, 512))), {})
cnt: 20, ((T([12544, 512], f16), T([512, 512], f16, stride=(1, 512))), {})
cnt: 20, ((T([12544, 512], f16), T([512, 2048], f16, stride=(1, 512))), {})
cnt: 20, ((T([12544, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {})
cnt: 1, ((T([64, 1000], f16), T([1000, 512], f16)), {})
cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 512], f16)), {})
cnt: 20, ((T([512, 12544], f16, stride=(1, 512)), T([12544, 2048], f16)), {})
cnt: 20, ((T([12544, 512], f16), T([512, 2048], f16)), {})
cnt: 20, ((T([2048, 12544], f16, stride=(1, 2048)), T([12544, 512], f16)), {})
cnt: 20, ((T([12544, 2048], f16), T([2048, 512], f16)), {})
cnt: 20, ((T([512, 12544], f16, stride=(1, 512)), T([12544, 512], f16)), {})
cnt: 20, ((T([12544, 512], f16), T([512, 512], f16)), {})
cnt: 20, ((T([1536, 12544], f16, stride=(1, 1536)), T([12544, 512], f16)), {})
cnt: 20, ((T([12544, 1536], f16), T([1536, 512], f16)), {})
cnt: 2, ((T([256, 50176], f16, stride=(1, 256)), T([50176, 1024], f16)), {})
cnt: 2, ((T([50176, 256], f16), T([256, 1024], f16)), {})
cnt: 2, ((T([1024, 50176], f16, stride=(1, 1024)), T([50176, 256], f16)), {})
cnt: 2, ((T([50176, 1024], f16), T([1024, 256], f16)), {})
cnt: 2, ((T([256, 50176], f16, stride=(1, 256)), T([50176, 256], f16)), {})
cnt: 2, ((T([50176, 256], f16), T([256, 256], f16)), {})
cnt: 2, ((T([768, 50176], f16, stride=(1, 768)), T([50176, 256], f16)), {})
cnt: 2, ((T([50176, 768], f16), T([768, 256], f16)), {})
cnt: 2, ((T([128, 200704], f16, stride=(1, 128)), T([200704, 512], f16)), {})
cnt: 2, ((T([200704, 128], f16), T([128, 512], f16)), {})
cnt: 2, ((T([512, 200704], f16, stride=(1, 512)), T([200704, 128], f16)), {})
cnt: 2, ((T([200704, 512], f16), T([512, 128], f16)), {})
cnt: 2, ((T([128, 200704], f16, stride=(1, 128)), T([200704, 128], f16)), {})
cnt: 2, ((T([200704, 128], f16), T([128, 128], f16)), {})
cnt: 2, ((T([384, 200704], f16, stride=(1, 384)), T([200704, 128], f16)), {})
cnt: 2, ((T([200704, 384], f16), T([384, 128], f16)), {})
Operator: aten.mul.Tensor
cnt: 4, ((T([64, 4, 16, 196, 196], f16), 0.1767766952966369), {})
cnt: 4, ((T([64, 16, 196, 128], f16), T([64, 1, 1, 1], f16)), {})
cnt: 4, ((T([64, 8, 4, 196, 196], f16), 0.1767766952966369), {})
cnt: 8, ((T([64, 4, 196, 256], f16), T([64, 1, 1, 1], f16)), {})
cnt: 40, ((T([64, 16, 1, 196, 196], f16), 0.1767766952966369), {})
cnt: 40, ((T([64, 1, 196, 512], f16), T([64, 1, 1, 1], f16)), {})
cnt: 40, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), T([64, 1, 1, 1], f16)), {})
Operator: aten.native_layer_norm.default
cnt: 4, ((T([64, 16, 196, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {})
cnt: 1, ((T([64, 56, 56, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {})
cnt: 4, ((T([64, 4, 196, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {})
cnt: 1, ((T([64, 28, 28, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {})
cnt: 40, ((T([64, 1, 196, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {})
cnt: 1, ((T([64, 14, 14, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {})
Operator: aten.native_layer_norm_backward.default
cnt: 1, ((T([64, 14, 14, 512], f16, stride=(100352, 14, 1, 196)), T([64, 14, 14, 512], f16), [512], T([64, 14, 14, 1], f32), T([64, 14, 14, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
cnt: 40, ((T([64, 1, 196, 512], f16), T([64, 1, 196, 512], f16), [512], T([64, 1, 196, 1], f32), T([64, 1, 196, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
cnt: 1, ((T([64, 28, 28, 512], f16), T([64, 28, 28, 512], f16), [512], T([64, 28, 28, 1], f32), T([64, 28, 28, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
cnt: 4, ((T([64, 4, 196, 256], f16), T([64, 4, 196, 256], f16), [256], T([64, 4, 196, 1], f32), T([64, 4, 196, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {})
cnt: 1, ((T([64, 56, 56, 256], f16), T([64, 56, 56, 256], f16), [256], T([64, 56, 56, 1], f32), T([64, 56, 56, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {})
cnt: 4, ((T([64, 16, 196, 128], f16), T([64, 16, 196, 128], f16), [128], T([64, 16, 196, 1], f32), T([64, 16, 196, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {})
Operator: aten.new_empty.default
cnt: 2, ((T([64, 16, 196, 128], f16), [64, 1, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False})
cnt: 4, ((T([64, 4, 196, 256], f16), [64, 1, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False})
cnt: 40, ((T([64, 1, 196, 512], f16), [64, 1, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False})
Operator: aten.new_empty_strided.default
cnt: 1, ((T([512, 256, 3, 3], f16, stride=(2304, 1, 768, 256)), [512, 256, 3, 3], [2304, 9, 3, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
cnt: 1, ((T([256, 128, 3, 3], f16, stride=(1152, 1, 384, 128)), [256, 128, 3, 3], [1152, 9, 3, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
Operator: aten.new_zeros.default
cnt: 1, ((T([64, 512], f16), [32768]), {})
Operator: aten.nll_loss_backward.default
cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {})
Operator: aten.nll_loss_forward.default
cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {})
Operator: aten.stack.default
cnt: 20, (([T([64, 16, 1, 196, 32], f16), T([64, 16, 1, 196, 32], f16, stride=(100352, 6272, 6272, 1, 196)), T([64, 16, 1, 196, 32], f16)],), {})
cnt: 2, (([T([64, 8, 4, 196, 32], f16), T([64, 8, 4, 196, 32], f16, stride=(200704, 25088, 6272, 1, 196)), T([64, 8, 4, 196, 32], f16)],), {})
cnt: 2, (([T([64, 4, 16, 196, 32], f16), T([64, 4, 16, 196, 32], f16, stride=(401408, 100352, 6272, 1, 196)), T([64, 4, 16, 196, 32], f16)],), {})
Operator: aten.sum.SymInt
cnt: 1, ((T([64, 1000], f16), [0], True), {})
cnt: 40, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), [0, 1, 2], True), {})
cnt: 20, ((T([64, 1, 196, 2048], f16), [0, 1, 2], True), {})
cnt: 20, ((T([64, 1, 196, 1536], f16), [0, 1, 2], True), {})
cnt: 1, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), [0], True), {})
cnt: 4, ((T([64, 4, 196, 256], f16), [0, 1, 2], True), {})
cnt: 2, ((T([64, 4, 196, 1024], f16), [0, 1, 2], True), {})
cnt: 2, ((T([64, 4, 196, 768], f16), [0, 1, 2], True), {})
cnt: 1, ((T([64, 4, 196, 256], f16), [0], True), {})
cnt: 4, ((T([64, 16, 196, 128], f16), [0, 1, 2], True), {})
cnt: 2, ((T([64, 16, 196, 512], f16), [0, 1, 2], True), {})
cnt: 2, ((T([64, 16, 196, 384], f16), [0, 1, 2], True), {})
cnt: 1, ((T([64, 16, 196, 128], f16), [0], True), {})
Operator: aten.unbind.int
cnt: 2, ((T([3, 64, 4, 16, 196, 32], f16, stride=(128, 1204224, 32, 75264, 384, 1)),), {})
cnt: 2, ((T([3, 64, 8, 4, 196, 32], f16, stride=(256, 602112, 32, 150528, 768, 1)),), {})
cnt: 20, ((T([3, 64, 16, 1, 196, 32], f16, stride=(512, 301056, 32, 301056, 1536, 1)),), {})
