Quiz-7 (2024.11.19) // 범위: 09wk-2 까지

Author

최규빈

Published

November 19, 2024

항목	허용 여부	비고
강의노트 참고	허용	수업 중 제공된 강의노트나 본인이 정리한 자료를 참고 가능
구글 검색	허용	인터넷을 통한 자료 검색 및 정보 확인 가능
생성 모형 사용	허용 안함	인공지능 기반 도구(GPT 등) 사용 불가

#!pip install datasets

import transformers
import datasets
#---#
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import torch
import torchvision

/home/cgb3/anaconda3/envs/hf/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

1. `model`

(1) 아래의 자료를 관찰하라.

d = datasets.load_dataset("food101", split="train[:8]")
_transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224,224)),
    torchvision.transforms.ToTensor(),
])
def transforms(examples):
    examples["pixel_values"] = [_transforms(img) for img in examples["image"]]
    del examples["image"]
    return examples
d = d.with_transform(transforms)[:8]
#d

d를 아래 model의 입력으로 사용하여 loss를 계산하라.

model = transformers.AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=101,
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

(풀이)

model.forward(
    labels=torch.tensor(d['label']),
    pixel_values=torch.stack(d['pixel_values'], axis=0)
)

ImageClassifierOutput(loss=tensor(4.5997, grad_fn=<NllLossBackward0>), logits=tensor([[ 7.6826e-02, -9.0105e-02,  7.5432e-02, -2.1185e-02,  1.5584e-01,
          7.3810e-02, -4.8122e-02, -5.3405e-02, -3.3512e-01, -2.2982e-01,
          6.2435e-02, -9.6915e-03,  8.9590e-03, -1.5497e-02,  1.8300e-01,
         -6.7354e-02,  1.9398e-01,  3.1862e-02, -3.0895e-02,  9.6069e-02,
         -1.6055e-01,  2.2154e-02, -8.2704e-02, -1.6198e-01,  1.0405e-01,
          3.8576e-02,  4.0748e-02,  9.8216e-02, -1.6747e-02, -2.5526e-02,
          8.3804e-02, -1.0589e-01,  2.5652e-02, -3.4092e-02, -9.8487e-02,
         -2.7755e-02,  1.0211e-02,  6.6089e-02,  7.3731e-02, -1.0649e-01,
          2.5525e-02,  4.8274e-02, -1.2212e-02,  1.1350e-01, -5.9155e-02,
          7.2843e-02, -5.7654e-02,  2.6358e-02, -1.4207e-02,  1.3983e-01,
          6.7270e-02, -8.4355e-02, -8.4920e-02,  8.4042e-02,  6.3501e-02,
         -1.2900e-01, -5.7161e-02,  1.6566e-01, -1.2487e-01, -9.7119e-02,
         -1.5221e-01,  2.1401e-01,  9.4425e-02,  8.4416e-02, -1.6235e-02,
         -1.4726e-01,  1.1324e-01, -1.0840e-01, -7.6411e-03, -2.2510e-01,
          1.2032e-02,  2.1051e-01,  1.3724e-01, -2.0181e-02,  1.5389e-02,
         -2.4832e-02, -2.1742e-01, -3.4528e-02,  1.0598e-01,  1.3383e-01,
         -1.2902e-01,  1.3344e-02, -3.6203e-02, -1.2312e-01, -4.9924e-02,
          5.3998e-02, -7.9252e-02, -2.0521e-02, -1.0370e-02, -6.2382e-02,
         -5.3397e-02, -2.1545e-02,  2.4555e-01, -1.0837e-01, -5.2716e-02,
         -3.8042e-02,  1.1873e-01,  9.2327e-02,  1.5943e-02, -2.2889e-02,
         -4.7897e-02],
        [ 1.1188e-01,  6.8977e-03,  2.7531e-02, -1.1447e-01, -9.8963e-03,
          1.5592e-01,  2.2469e-02, -1.6465e-01, -9.2218e-02, -1.3464e-01,
          1.3903e-01,  8.5439e-02,  2.2874e-02, -6.2274e-02, -8.6971e-02,
         -7.5859e-02,  8.3531e-02,  8.1950e-02,  6.0326e-02, -1.7273e-01,
         -8.3339e-02,  1.5062e-01, -7.7413e-02, -2.9653e-02,  2.0201e-02,
          3.4206e-02,  1.6945e-02,  1.1270e-01,  1.4237e-01,  8.7586e-02,
          1.6947e-01, -7.1226e-03, -1.9013e-02, -3.9833e-02,  5.0335e-02,
         -8.3362e-02,  6.1431e-03,  1.6856e-01, -1.6370e-01, -1.8751e-01,
         -3.4121e-02,  1.7139e-01,  1.2031e-02, -2.9437e-02, -1.7220e-01,
          8.1221e-02,  2.6955e-03,  5.2098e-02,  1.4157e-01,  6.0883e-02,
         -5.4722e-02, -1.3417e-01, -1.8741e-01, -1.8906e-02,  2.7238e-02,
         -1.4890e-01,  1.3666e-01,  2.9241e-02,  1.1901e-01, -1.8813e-01,
         -1.2579e-01,  1.1834e-01,  1.0300e-02,  3.9116e-02, -8.6755e-02,
         -1.0378e-01, -3.1473e-02,  7.7547e-02, -5.1821e-02, -2.0954e-01,
          4.1575e-02,  4.3429e-02, -1.2075e-02,  7.0189e-02, -1.2232e-01,
         -6.5592e-02, -1.1457e-01, -9.2240e-02,  1.5634e-01,  1.0332e-01,
         -1.3351e-01, -2.5504e-02, -1.6221e-01, -4.0601e-02,  2.3233e-02,
         -1.5017e-01, -4.9325e-02,  4.5483e-02,  1.0775e-04, -3.5611e-03,
          6.2557e-03,  1.5400e-01,  1.8519e-01, -1.2308e-01, -1.0690e-01,
         -2.6284e-02,  6.4903e-02,  5.8166e-02, -7.4635e-02,  1.0414e-01,
         -7.1878e-02],
        [ 8.3454e-02, -1.7464e-02,  9.6078e-02, -7.8314e-02,  5.3206e-02,
          1.3365e-01,  1.4758e-02, -5.0545e-02, -2.3400e-01, -9.3800e-02,
          5.1795e-04,  5.2090e-02,  4.0230e-02,  3.5658e-02, -7.4095e-02,
         -9.0949e-02,  1.6369e-01,  1.0158e-01,  3.1612e-02,  1.8942e-02,
         -9.9837e-03, -2.9345e-02, -6.0241e-02, -1.7624e-02,  7.8234e-02,
         -1.0971e-01,  8.2808e-02,  1.4628e-02,  4.6643e-02, -1.0322e-02,
          4.0841e-02,  4.5129e-03, -6.3666e-02, -1.2433e-01, -8.7436e-02,
         -6.6283e-02,  7.1268e-02,  7.5205e-02, -6.2879e-02, -4.9454e-02,
          2.5345e-02,  1.9322e-01, -7.1178e-02, -9.2667e-03, -3.2151e-02,
          1.1101e-01,  5.4731e-02,  6.8563e-02,  1.7650e-01,  1.5907e-01,
          3.1264e-02, -1.0033e-01, -1.2024e-01,  2.9210e-02,  1.5264e-01,
         -8.1677e-02,  4.2990e-02,  4.6707e-02, -7.2563e-02, -5.5904e-02,
         -8.1827e-02,  7.2650e-02,  5.3886e-02,  1.1260e-01, -1.2676e-02,
         -1.0600e-01,  4.4539e-02, -2.5486e-02, -8.3940e-02, -1.7769e-01,
          4.0808e-02,  7.3820e-02,  6.4229e-02,  4.0850e-03, -5.5198e-02,
          6.5355e-03, -1.1622e-01, -1.1486e-01,  6.2790e-02,  1.2788e-01,
         -1.3774e-01, -3.7448e-02,  3.6094e-02, -4.3242e-02,  2.2369e-02,
          4.5747e-02, -4.3836e-02, -5.7471e-02, -2.5824e-02,  3.5743e-02,
         -3.3916e-02,  5.3398e-03,  1.0495e-01, -5.7004e-02,  9.9216e-02,
          2.0817e-02,  1.2368e-01, -8.3626e-02, -4.2389e-02,  4.7912e-02,
         -5.0829e-02],
        [-9.4346e-02, -1.0898e-01, -1.0180e-02, -1.7139e-01, -1.1359e-01,
         -1.1509e-01,  1.3198e-01,  1.0231e-01,  5.4028e-02,  1.4325e-01,
          1.0368e-01,  6.5669e-02,  3.4917e-02, -1.8831e-01, -1.5879e-01,
         -3.2228e-03, -1.2673e-01,  3.0889e-02,  8.4178e-02, -1.2064e-01,
         -7.7863e-03,  4.4275e-02, -8.5879e-02, -1.0124e-01, -3.8455e-02,
         -7.0958e-02,  1.2863e-01,  6.8275e-02, -1.2072e-02, -5.9935e-03,
         -4.5137e-03, -2.9461e-02, -1.3239e-01, -6.4135e-02, -6.8259e-02,
         -7.9751e-02, -7.1084e-02,  7.7529e-02,  3.4804e-02, -1.3198e-01,
         -1.0916e-01, -1.5193e-01, -1.1269e-01, -2.3167e-01,  2.1231e-02,
          1.1357e-01,  7.9064e-02, -1.4860e-02,  8.4246e-02, -2.6162e-02,
          7.9698e-02, -3.3303e-02,  8.6202e-02, -1.4442e-01,  6.1333e-03,
         -5.3624e-02, -1.1164e-01, -8.3984e-03,  6.3743e-02,  3.9429e-02,
         -6.4561e-03, -1.0856e-01,  5.8817e-04, -4.9399e-02,  4.0378e-02,
          1.1429e-01, -3.6314e-02,  8.1419e-02,  2.1837e-01, -7.3273e-02,
          1.2880e-02, -2.8462e-02,  2.1577e-01,  1.8301e-01, -5.1624e-02,
          5.1300e-02, -1.2909e-01,  1.9539e-01,  2.4810e-01,  1.1181e-01,
          3.5251e-02,  2.6556e-01,  1.0448e-01,  2.5936e-02,  4.3115e-02,
          8.5593e-02,  8.1973e-03, -1.1744e-01, -1.5502e-01, -2.0239e-01,
          2.8152e-01,  4.4686e-02,  1.6554e-01, -2.4731e-01,  2.0113e-02,
         -1.5778e-01,  3.9013e-02, -4.4591e-02, -1.5734e-01,  7.0030e-02,
          7.3710e-02],
        [ 9.1484e-02,  1.9428e-03,  6.7192e-02, -5.3958e-02, -1.3578e-02,
          2.3692e-02,  1.1579e-01, -5.1181e-02, -8.6415e-02, -4.6918e-02,
          3.5304e-02, -6.2002e-02, -6.7094e-02, -5.4630e-02, -3.8574e-03,
         -8.6196e-02,  1.2100e-01,  9.8612e-02, -3.4887e-02, -4.0103e-02,
         -1.1676e-01,  1.1268e-01, -4.6907e-02,  8.0041e-02, -9.1498e-03,
         -1.1369e-01, -4.9732e-02, -3.9191e-02,  2.2806e-03, -9.4521e-03,
          1.0769e-01, -2.2485e-02,  8.4227e-02, -1.2566e-01,  7.0916e-02,
         -1.3824e-01,  3.4383e-02,  2.8987e-01, -2.9587e-02, -2.1134e-01,
          5.2647e-02,  1.9974e-01,  5.8155e-02,  3.6288e-02, -4.0187e-02,
          5.4001e-02,  1.6494e-01, -1.4371e-02,  6.0328e-02,  6.6007e-03,
         -1.7996e-02, -2.2812e-01, -1.0069e-02,  1.3870e-01,  9.9448e-02,
         -1.0185e-01,  9.3195e-02, -7.7491e-03,  1.1225e-01, -8.8549e-02,
         -1.7836e-01,  1.6137e-01,  2.1840e-01,  1.1491e-01, -2.6517e-02,
         -9.6914e-02,  1.4414e-02,  5.0654e-02,  4.8849e-03, -2.1760e-01,
          1.2459e-01,  1.5795e-01, -1.1280e-02,  1.3144e-01, -7.6349e-02,
          1.8674e-01, -1.1992e-01,  6.0513e-02,  4.7480e-02,  1.3025e-03,
         -1.0591e-02,  2.3180e-02, -7.0612e-02, -3.0720e-02,  3.2069e-02,
         -8.0909e-02, -6.9726e-02, -7.0269e-02,  2.9035e-02, -4.9897e-02,
         -5.8495e-02,  1.3604e-01,  1.3136e-01, -1.3679e-01, -1.0470e-01,
         -3.0206e-02,  2.1697e-03,  3.9131e-02,  6.4948e-03,  9.5721e-02,
         -1.8183e-02],
        [-6.8091e-02, -2.2053e-02, -5.4299e-02,  1.0026e-02, -7.5670e-02,
          2.9120e-01, -1.2530e-01, -2.7368e-02,  6.3109e-02, -6.3433e-02,
          1.9503e-01,  9.7034e-02,  1.3122e-01, -2.3407e-02,  6.7617e-02,
         -7.8881e-02, -1.2107e-01,  4.3068e-02, -1.0447e-01,  1.0066e-02,
         -1.3075e-01,  1.8664e-01, -1.2884e-01, -2.3350e-01, -5.3403e-02,
         -2.1424e-02,  1.4673e-01,  4.2965e-02, -2.0390e-02,  1.8080e-03,
         -2.1195e-02, -6.6838e-02, -1.8415e-01, -7.2461e-02,  5.1177e-02,
         -8.9933e-02,  1.2272e-01,  1.5792e-01,  1.3150e-01, -1.7502e-01,
          9.1137e-02,  2.2346e-01,  5.6453e-02,  8.6020e-03,  8.5138e-04,
          5.9339e-02, -1.0922e-02,  4.7535e-02, -2.1344e-02, -1.3891e-01,
         -9.8343e-02, -2.1360e-02, -1.0708e-01,  8.2102e-02,  1.8533e-02,
         -1.1787e-01,  1.2372e-02, -1.2645e-01,  8.0801e-02, -1.4910e-01,
         -1.2865e-01, -7.5083e-03,  3.0437e-03, -4.6185e-02, -6.0365e-02,
         -3.6952e-02, -1.3331e-01,  7.7585e-02, -7.9826e-02, -1.5719e-01,
          4.0447e-02,  2.0216e-01, -3.7560e-02, -1.6181e-02,  1.5465e-01,
          2.8233e-02, -1.2869e-01, -7.2256e-04,  8.6663e-02,  1.2580e-01,
         -1.8632e-02,  6.8527e-02, -1.1103e-01, -1.9610e-01, -2.4999e-04,
         -5.0273e-02, -2.6269e-02,  1.8548e-02, -1.0961e-01, -9.7892e-02,
          7.6467e-02,  5.6241e-02,  1.2632e-01, -9.1389e-02,  6.3114e-02,
          1.7551e-02,  1.5902e-02, -3.6639e-02,  6.2470e-02,  8.0667e-02,
         -5.1057e-02],
        [-8.5142e-02, -8.9607e-02, -6.4751e-02, -2.8778e-01,  1.5176e-01,
          1.9441e-01,  3.1547e-02, -8.2527e-02,  8.2405e-03, -1.3972e-02,
         -4.5330e-03,  1.2861e-01,  5.5947e-02, -1.3357e-01,  7.9001e-02,
         -9.1627e-02,  1.2748e-01, -3.3189e-02, -1.2313e-01, -8.2868e-02,
         -2.0738e-02, -7.5120e-02, -1.9838e-01, -5.1462e-02,  1.3406e-01,
          7.1782e-02,  5.9754e-02,  1.2943e-01, -1.6291e-02, -1.2361e-02,
         -9.5127e-02, -3.5291e-02, -5.4463e-02,  1.9766e-02,  7.9795e-02,
          5.1650e-02,  2.6527e-02,  3.7669e-02, -7.8447e-02, -1.2950e-01,
          3.0076e-02, -3.3822e-02, -6.5808e-02, -3.9498e-02,  8.2671e-02,
          1.5100e-01, -1.3759e-02,  1.7150e-02,  1.1856e-01,  1.7870e-01,
          1.6234e-01, -1.7568e-01, -6.4102e-02, -6.3008e-02,  2.1069e-01,
          8.6167e-02,  3.7129e-02,  2.1473e-01, -5.1070e-03,  3.0337e-02,
         -8.6460e-02, -4.8140e-02,  1.4466e-02,  1.1255e-01,  4.3306e-02,
          1.0917e-01, -1.3594e-01,  4.7389e-02, -5.2878e-02, -1.9507e-01,
         -4.5144e-02,  4.7627e-02,  9.6730e-03, -8.2566e-04, -9.2624e-02,
         -5.6419e-02, -3.9401e-02, -3.0450e-02,  1.1893e-01,  1.5174e-01,
         -1.5906e-01,  6.4472e-02,  1.2727e-01, -1.0339e-01, -7.8363e-02,
          5.9199e-02, -7.5571e-02, -5.1857e-02, -9.5273e-02,  6.6884e-02,
          6.8088e-02, -2.0958e-02,  1.6434e-01,  1.1016e-02, -6.0374e-02,
          6.2756e-02, -2.2494e-02, -4.2652e-03, -5.3606e-02, -8.5731e-02,
          1.1022e-01],
        [ 1.2647e-01,  2.1974e-02, -1.7799e-01, -4.3860e-02,  8.8778e-02,
          2.0480e-01,  1.8056e-02,  2.2607e-02, -1.0869e-01, -1.9874e-02,
          4.4657e-02, -1.5137e-02, -5.5640e-02,  8.2342e-02, -8.3340e-02,
         -7.1877e-02,  3.9177e-02,  1.7732e-02,  4.8430e-02, -8.1378e-02,
         -1.4166e-01,  1.3250e-01,  5.7068e-03,  8.0217e-02, -2.4356e-02,
          1.4663e-02,  1.0365e-01,  5.8049e-02, -4.1872e-02,  1.3483e-02,
          1.2275e-01, -6.8047e-02, -5.6021e-02, -1.4483e-01,  1.2288e-01,
         -7.6178e-02,  1.4799e-01,  1.0858e-01, -8.8437e-02, -2.0404e-01,
          8.3924e-02,  1.1665e-02, -3.8916e-02, -1.0987e-01, -1.6142e-01,
          9.2289e-02,  1.1871e-01,  1.9311e-02,  1.7466e-01, -3.3136e-02,
          1.0745e-01, -2.7250e-01, -2.9279e-02,  1.6192e-01,  5.8903e-02,
         -1.1470e-01,  5.0832e-02,  6.5421e-02,  2.5165e-01, -1.5822e-01,
         -2.7897e-01,  1.1292e-01,  9.3952e-02,  1.7640e-01, -1.3507e-01,
          6.9136e-02,  2.4462e-02,  1.3372e-02, -2.1756e-02, -2.7804e-01,
          1.0038e-01,  6.6023e-02,  1.2403e-01,  2.0432e-01, -1.9919e-02,
         -3.2780e-02, -9.2834e-02, -1.2241e-01,  4.8489e-02,  6.1388e-02,
         -8.0433e-02,  2.2131e-01, -4.0531e-02, -9.1007e-02,  1.3798e-01,
         -2.4135e-01, -6.7475e-02, -2.2700e-02, -5.3063e-02,  2.4430e-02,
          2.3689e-02,  1.5080e-01,  8.6167e-02, -2.0621e-01, -1.3779e-01,
         -5.8933e-02,  1.4094e-01, -2.4728e-02, -1.5938e-02,  1.9240e-01,
         -6.9384e-02]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

(2) 아래의 자료를 관찰하라.

d = datasets.load_dataset("food101", split="train[:1]")
_transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224,224)),
    torchvision.transforms.ToTensor(),
])
def transforms(examples):
    examples["pixel_values"] = [_transforms(img) for img in examples["image"]]
    del examples["image"]
    return examples
d = d.with_transform(transforms)[0]
d

{'label': 6,
 'pixel_values': tensor([[[0.1216, 0.1176, 0.1176,  ..., 0.0078, 0.0039, 0.0000],
          [0.1333, 0.1294, 0.1255,  ..., 0.0039, 0.0000, 0.0000],
          [0.1451, 0.1373, 0.1333,  ..., 0.0039, 0.0000, 0.0000],
          ...,
          [0.2235, 0.2157, 0.2471,  ..., 0.3569, 0.3725, 0.3529],
          [0.2627, 0.2588, 0.2314,  ..., 0.3490, 0.3608, 0.3608],
          [0.2431, 0.2627, 0.2549,  ..., 0.3373, 0.3412, 0.3373]],
 
         [[0.1373, 0.1333, 0.1333,  ..., 0.1059, 0.1020, 0.0980],
          [0.1490, 0.1451, 0.1412,  ..., 0.1020, 0.0980, 0.0941],
          [0.1608, 0.1529, 0.1490,  ..., 0.1020, 0.0941, 0.0863],
          ...,
          [0.2157, 0.2078, 0.2431,  ..., 0.3333, 0.3490, 0.3333],
          [0.2510, 0.2471, 0.2235,  ..., 0.3216, 0.3451, 0.3529],
          [0.2314, 0.2510, 0.2431,  ..., 0.3098, 0.3294, 0.3333]],
 
         [[0.1451, 0.1451, 0.1451,  ..., 0.0941, 0.0902, 0.0863],
          [0.1608, 0.1569, 0.1529,  ..., 0.0902, 0.0863, 0.0824],
          [0.1725, 0.1647, 0.1608,  ..., 0.0902, 0.0824, 0.0784],
          ...,
          [0.1412, 0.1333, 0.1647,  ..., 0.2196, 0.2353, 0.2196],
          [0.1765, 0.1725, 0.1490,  ..., 0.2078, 0.2275, 0.2314],
          [0.1569, 0.1765, 0.1686,  ..., 0.1961, 0.2118, 0.2118]]])}

d를 아래 model의 입력으로 사용하여 loss를 계산하라.

model = transformers.AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=101,
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

(풀이)

model.forward(
    labels=torch.tensor([d['label']]),
    pixel_values=torch.stack([d['pixel_values']], axis=0)
)

ImageClassifierOutput(loss=tensor(4.5957, grad_fn=<NllLossBackward0>), logits=tensor([[ 0.1519,  0.0421,  0.0847, -0.0296, -0.1074,  0.0327,  0.0381,  0.1434,
          0.0762,  0.1087,  0.0567,  0.2495, -0.1418, -0.0435, -0.0919, -0.2366,
         -0.0408, -0.2031,  0.0534, -0.0367,  0.0771, -0.1781, -0.0031,  0.0235,
         -0.0508, -0.2475,  0.2553,  0.0192, -0.1095,  0.0517, -0.0726, -0.0424,
         -0.0214,  0.1052,  0.1299,  0.2157, -0.1121, -0.0293, -0.0680,  0.0239,
          0.0805,  0.2058, -0.0433,  0.0341, -0.0848, -0.0193, -0.0263,  0.0058,
         -0.0062, -0.0518, -0.0941,  0.1879, -0.0868, -0.0237, -0.0055,  0.0460,
         -0.0067,  0.1300,  0.0287, -0.0236,  0.1007,  0.2171,  0.1671, -0.0045,
          0.1083, -0.1243, -0.0193,  0.1496,  0.1563,  0.0729,  0.0076,  0.0756,
         -0.0562,  0.1650,  0.0549, -0.0824,  0.0041,  0.0151,  0.1223,  0.0802,
         -0.0875,  0.0186,  0.0311,  0.0323,  0.0155,  0.1161, -0.1092,  0.1090,
         -0.1826, -0.1482,  0.1398, -0.0812,  0.0598,  0.1545, -0.1857, -0.0157,
          0.0272,  0.0088, -0.1251, -0.0956,  0.1077]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

(3) 아래의 자료 image1 image2를 관찰하라.

from PIL import Image
import requests
from io import BytesIO

url = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Shoyu_ramen%2C_at_Kasukabe_Station_%282014.05.05%29_1.jpg/500px-Shoyu_ramen%2C_at_Kasukabe_Station_%282014.05.05%29_1.jpg"
response = requests.get(url)
image1 = Image.open(BytesIO(response.content))
image1

url = "https://upload.wikimedia.org/wikipedia/commons/f/f9/Bibimbap_made_by_Mrs._Jang.jpg"
response = requests.get(url)
image2 = Image.open(BytesIO(response.content))
image2

image1, image2 를 적당히 변환하여 (2,3,224,224) 의 tensor로 만든뒤 아래의 모형의 입력으로 사용하라.

config = transformers.AutoConfig.from_pretrained("google/vit-base-patch16-224-in21k")
config.image_size = 224
config.num_labels = 101
model = transformers.AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    config=config,

)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Notes

정규화를 생략하고 텐서 변환과 리사이즈 정도의 변환만 수행해도 정답으로 인정함.
라벨은 모델에 전달하지 않아도 무방함. 즉 loss는 계산하지 않아도 무방함.

(풀이)

f = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize((224, 224))
])
model(pixel_values=torch.stack([f(image1), f(image2)]))

ImageClassifierOutput(loss=None, logits=tensor([[-0.0653,  0.0641,  0.0275, -0.0767,  0.0914, -0.0843, -0.0095,  0.0794,
         -0.0525,  0.1611, -0.0290, -0.0571,  0.0638, -0.0695, -0.1568, -0.1168,
          0.1073, -0.1338,  0.0023,  0.1203, -0.0574, -0.0785, -0.0967, -0.1262,
         -0.1663,  0.0729, -0.0902, -0.2013, -0.0517,  0.0591, -0.0118, -0.0183,
          0.0222,  0.0441, -0.0946,  0.0083, -0.0043, -0.0057,  0.0671, -0.0847,
         -0.0680,  0.1215,  0.0185,  0.1103,  0.0807, -0.0700,  0.0117,  0.2419,
         -0.0970, -0.0554,  0.1159,  0.0334, -0.0426,  0.1251, -0.0690,  0.0834,
         -0.1035,  0.0099,  0.1139,  0.0088,  0.1347, -0.2821, -0.0401, -0.1899,
          0.0491,  0.2097,  0.0962, -0.1737,  0.1471, -0.0153, -0.0048, -0.0831,
         -0.0085,  0.0331,  0.0406, -0.1343, -0.0871,  0.0433, -0.1360,  0.1215,
         -0.1829,  0.0775,  0.0150, -0.0069,  0.0531,  0.0831, -0.0082, -0.0597,
         -0.1527, -0.1363,  0.0086,  0.0749, -0.0558, -0.1586, -0.0698,  0.0515,
          0.0647,  0.1985,  0.0294, -0.0328,  0.0677],
        [-0.1179, -0.1062,  0.0355,  0.0604,  0.1260,  0.1135,  0.1221, -0.0251,
          0.0042, -0.0463,  0.0140,  0.0934, -0.0124,  0.1090, -0.1515, -0.1498,
         -0.0300,  0.0317, -0.1586,  0.0770, -0.0830, -0.2840, -0.1025, -0.0301,
          0.0530, -0.1754, -0.0492, -0.1541,  0.2258, -0.0402, -0.0281,  0.0376,
          0.2181, -0.0643, -0.0585, -0.1741, -0.0757, -0.0428,  0.0436, -0.0808,
          0.0218,  0.1201,  0.0408, -0.0972,  0.0545,  0.0200,  0.0342,  0.0472,
          0.0095, -0.1572, -0.0469, -0.0249, -0.0642,  0.1126, -0.1253,  0.0170,
          0.0031, -0.1293,  0.0217, -0.0116, -0.0447, -0.0031,  0.0464, -0.0813,
          0.0625,  0.1606, -0.0825,  0.0916, -0.1614, -0.0391,  0.0022, -0.1794,
         -0.0074, -0.0214,  0.0798,  0.0706,  0.0145,  0.0276, -0.0038,  0.1055,
         -0.1081,  0.0983, -0.1621, -0.0237, -0.0073, -0.1320, -0.0418, -0.0268,
          0.1930,  0.1639,  0.0360, -0.0023,  0.1277,  0.0286,  0.0648, -0.1222,
          0.0259,  0.0614,  0.0473, -0.2201,  0.0510]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

(4) (3)에서의 image1, image2 를 적당히 변환하여 (2,3,?,?) 의 tensor로 만든뒤 아래의 모형의 입력으로 사용하라.

config = transformers.AutoConfig.from_pretrained("google/vit-base-patch16-224-in21k")
config.image_size = 64
config.num_labels = 101
model = transformers.AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    config = config,
    ignore_mismatched_sizes = True
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized because the shapes did not match:
- embeddings.position_embeddings: found shape torch.Size([1, 197, 768]) in the checkpoint and torch.Size([1, 17, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Notes

정규화를 생략하고 텐서 변환과 리사이즈 정도의 변환만 수행해도 정답으로 인정함.
라벨은 모델에 전달하지 않아도 무방함. 즉 loss는 계산하지 않아도 무방함.

(풀이)

f = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize((64, 64))
])
model(pixel_values=torch.stack([f(image1), f(image2)]))

ImageClassifierOutput(loss=None, logits=tensor([[ 0.0212, -0.0771, -0.0605, -0.1835, -0.1227,  0.0089, -0.0588,  0.1302,
          0.0353, -0.0484,  0.0964,  0.1646,  0.0236, -0.0040, -0.1185, -0.0121,
         -0.0385,  0.1722, -0.0205, -0.0439,  0.1443, -0.1304,  0.0570, -0.0637,
          0.1146,  0.1022, -0.0157,  0.0319,  0.0633, -0.0815,  0.0293,  0.0164,
         -0.0655,  0.0314, -0.0423,  0.1512, -0.0016, -0.1842,  0.1167,  0.0589,
         -0.0171, -0.1791, -0.0386, -0.0535,  0.0894, -0.0842,  0.0456,  0.0558,
          0.0240,  0.0595,  0.0602, -0.0988,  0.0933,  0.0183,  0.0151, -0.0570,
         -0.0357,  0.0562, -0.0198,  0.0265, -0.1284, -0.0572,  0.1064,  0.0464,
          0.0803,  0.0060,  0.0522,  0.0900, -0.0575, -0.0059, -0.0342, -0.0069,
          0.0229,  0.0527,  0.1081,  0.0243,  0.0328,  0.1047, -0.0259, -0.0383,
         -0.1083, -0.0758, -0.1305,  0.0086, -0.0492, -0.0067,  0.0770, -0.0387,
         -0.1297,  0.0194, -0.0423, -0.0907,  0.0726,  0.0027,  0.0238, -0.0398,
          0.1453, -0.1633,  0.0538, -0.1173, -0.0967],
        [ 0.1121, -0.0739, -0.0623, -0.1765,  0.1107,  0.0731, -0.2481, -0.0213,
         -0.0165, -0.1801,  0.0661,  0.1108,  0.1713, -0.0030,  0.0337, -0.0439,
         -0.0901, -0.0569, -0.1077, -0.1419,  0.0933,  0.0512,  0.0631,  0.0272,
         -0.0016, -0.0901, -0.1058, -0.1481,  0.0509, -0.0537, -0.0993,  0.0507,
         -0.1728, -0.0832,  0.1580, -0.1036,  0.1509, -0.1827, -0.1906, -0.1341,
         -0.1283,  0.0696,  0.1769, -0.0613, -0.2720, -0.0669, -0.0425, -0.1467,
         -0.0041,  0.0052,  0.0623, -0.0071,  0.0153, -0.1510, -0.0204,  0.1857,
         -0.0990, -0.0896, -0.0664,  0.0167, -0.2330, -0.0987, -0.0146, -0.1457,
          0.1045, -0.0520, -0.1489, -0.1459, -0.1953,  0.2266,  0.0794,  0.1728,
         -0.2162, -0.1092,  0.1113,  0.0851,  0.0620,  0.1316, -0.0173, -0.0890,
         -0.2102, -0.0749, -0.2449,  0.0741,  0.1630, -0.0381,  0.0046, -0.0530,
         -0.0499,  0.1425, -0.0973, -0.0361, -0.0500, -0.0883, -0.1765, -0.1511,
         -0.0355,  0.0229, -0.0891, -0.0106,  0.0021]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

(5) 아래의 자료를 관찰하라.

df = pd.read_csv("https://raw.githubusercontent.com/guebin/MP2024/refs/heads/main/posts/mbti_1.csv")
mbti = datasets.Dataset.from_pandas(df)

def transform(example):
    result = dict()
    if "E" in example['type']:
        result['labels'] = 1
    else:
        result['labels'] = 0
    del example['type']
    return result

mbti = mbti.map(transform)

d = mbti.select(range(10))
d

Dataset({
    features: ['posts', 'labels'],
    num_rows: 10
})

d를 아래 model의 입력으로 사용하여 loss를 계산하라.

model = transformers.AutoModelForSequenceClassification.from_pretrained(
    "distilbert/distilbert-base-uncased", num_labels=2
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

(풀이)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    "distilbert/distilbert-base-uncased"
)

/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: 
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  warnings.warn(

tokenizer(d['posts'],padding=True,truncation=True,return_tensors="pt")

{'input_ids': tensor([[  101,  1005,  8299,  ...,  2017,  2215,   102],
        [  101,  1005,  1045,  ...,  3259,  2028,   102],
        [  101,  1005,  2204,  ...,  1012,  2077,   102],
        ...,
        [  101,  1005,  1045,  ...,  2129, 15743,   102],
        [  101,  1045,  1005,  ...,  2107,  2004,   102],
        [  101,  1005, 16770,  ...,  1045,  3246,   102]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]])}

model.forward(
    **tokenizer(d['posts'],padding=True,truncation=True,return_tensors="pt")
)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.1040, -0.0686],
        [-0.0636, -0.0867],
        [-0.0767, -0.0764],
        [-0.0361, -0.1272],
        [-0.0618, -0.1035],
        [-0.0342, -0.1299],
        [-0.0274, -0.1071],
        [-0.0250, -0.1089],
        [-0.0660, -0.0784],
        [-0.0604, -0.1017]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

(6) 아래의 자료를 관찰하라.

df = pd.read_csv("https://raw.githubusercontent.com/guebin/MP2024/refs/heads/main/posts/mbti_1.csv")
mbti = datasets.Dataset.from_pandas(df)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    "distilbert/distilbert-base-uncased"
)
tokenized = tokenizer(mbti.select(range(2))['posts'],padding=True,return_tensors="pt")
tokenized

Token indices sequence length is longer than the specified maximum sequence length for this model (2102 > 512). Running this sequence through the model will result in indexing errors

{'input_ids': tensor([[ 101, 1005, 8299,  ...,    0,    0,    0],
        [ 101, 1005, 1045,  ..., 1012, 1005,  102]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 1]])}

tokenized를 입력으로 받을수 있는 모형을 설계하고 아래를 실행하라. (loss는 계산하지 않아도 무방함)

#model(**tokenized)

(풀이)

tokenized['input_ids'].shape

torch.Size([2, 2102])

config = transformers.AutoConfig.from_pretrained(
    "distilbert/distilbert-base-uncased"
)
config.max_position_embeddings = 2273
model = transformers.AutoModelForSequenceClassification.from_config(
    config=config
)

model(**tokenized)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.0858,  0.8048],
        [-0.0705,  0.4062]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

(7) 아래는 음성인식을 위한 모델이다.

model =transformers.AutoModelForAudioClassification.from_pretrained(
    "facebook/wav2vec2-base"
)

/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py:306: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

이 모델의 입력을 아래와 같이 설정하였으나 에러가 발생했다.

model(
    input_values = torch.tensor([1]*1000).reshape(1,1000),
    attention_mask = torch.tensor([1]*1000).reshape(1,1000)
)

RuntimeError: expected scalar type Long but found Float

코드를 수정하여 올바른 입력데이터를 넣어라.

(풀이)

model(
    input_values = torch.tensor([1]*1000).reshape(1,1000).float(),
    attention_mask = torch.tensor([1]*1000).reshape(1,1000)
)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.0334,  0.1047]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

2. `with`

(1) 아래의 코드를 관찰하라.

tsr = torch.randn(10,16,3,224,224)
model = transformers.VideoMAEForVideoClassification.from_pretrained(
    "MCG-NJU/videomae-base",
)
model(tsr)

/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: 
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  warnings.warn(
Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

ImageClassifierOutput(loss=None, logits=tensor([[ 0.4060,  0.1346],
        [ 0.1393,  0.0789],
        [ 0.1066,  0.0795],
        [ 0.1970, -0.0070],
        [ 0.1905,  0.1503],
        [ 0.2089,  0.1376],
        [ 0.0104, -0.0011],
        [ 0.2811,  0.1901],
        [ 0.2035,  0.1625],
        [ 0.2755, -0.0095]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

with torch.no_grad() 를 사용하여 logits에 grad_fn=<AddmmBackward0> 이 포함되지 않도록 위 코드를 수정하라.

(풀이)

with torch.no_grad():
  print(model(tsr))

ImageClassifierOutput(loss=None, logits=tensor([[ 0.4060,  0.1346],
        [ 0.1393,  0.0789],
        [ 0.1066,  0.0795],
        [ 0.1970, -0.0070],
        [ 0.1905,  0.1503],
        [ 0.2089,  0.1376],
        [ 0.0104, -0.0011],
        [ 0.2811,  0.1901],
        [ 0.2035,  0.1625],
        [ 0.2755, -0.0095]]), hidden_states=None, attentions=None)

(2) 아래의 코드를 관찰하라.

tsr = torch.randn(10,16,3,224,224)
model = transformers.VideoMAEForVideoClassification.from_pretrained(
    "MCG-NJU/videomae-base",
)
model(tsr)

/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: 
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  warnings.warn(
Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

ImageClassifierOutput(loss=None, logits=tensor([[-0.1040, -0.0061],
        [-0.0597,  0.2367],
        [-0.2060,  0.2247],
        [-0.2293,  0.1689],
        [-0.2859,  0.1811],
        [-0.2340,  0.1257],
        [-0.0881,  0.1403],
        [-0.2519,  0.1412],
        [-0.1075,  0.0054],
        [-0.2806,  0.1081]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

아래와 같이 동작하는 적절한 클래스 NoGradContext 를 설계하라.

with NoGradContext():
    tsr = torch.randn(10,16,3,224,224)
    model = transformers.VideoMAEForVideoClassification.from_pretrained(
        "MCG-NJU/videomae-base",
    )
    print(model(tsr))

Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

ImageClassifierOutput(loss=None, logits=tensor([[-0.2523, -0.5055],
        [-0.1680, -0.5540],
        [-0.3205, -0.4462],
        [-0.1461, -0.6196],
        [-0.1723, -0.6704],
        [-0.1323, -0.5601],
        [-0.2817, -0.4697],
        [-0.1575, -0.4805],
        [-0.2094, -0.4485],
        [-0.2593, -0.5014]]), hidden_states=None, attentions=None)

(풀이)

class NoGradContext:
  def __enter__(self):
    torch.set_grad_enabled(False)
  def __exit__(self,*args):
    torch.set_grad_enabled(True)

(3) 아래와 같은 dataframe을 살펴보자.

df = pd.DataFrame(np.random.randn(61,2))
display(df)

	0	1
0	1.797597	-0.509486
1	2.026591	-1.130091
2	0.774137	-0.393504
3	0.859450	-0.689825
4	-0.930429	-0.715551
...	...	...
56	-0.720481	-0.116972
57	-1.127292	2.401930
58	1.819896	2.160495
59	-0.034305	0.247947
60	-0.354988	0.184486

61 rows × 2 columns

이 df에는 일부 rows가 생략되어 출력되는데, 이는 때때로 불편하다. 생략되지 않은 출력을 잠시 보기 위하여 아래와 같은 코드를 사용할 수 있다.

# pd.set_option('display.max_rows', None)  # 모든 행 출력
# display(df) # 모든행이 출력됨

# pd.reset_option('display.max_rows') # 원래상태로 복원
# display(df) # 다시 일부행만 출력됨

때떄로 모든 행을 출력하고 싶을때를 대비하여 아래와 같은 동작이 가능한 적당한 클래스 DisplayAllRows을 설계하라.

# # 평소에는 생략된출력
# display(df)

# # 원할때는 모든 rows를 출력
# with DisplayAllRows():
#     display(df)

(풀이)

class DisplayAllRows:
  def __enter__(self):
    pd.set_option('display.max_rows', None)
  def __exit__(self,*args):
    pd.reset_option('display.max_rows')

1. model

2. with

1. `model`

2. `with`