Quiz-9 (2024.12.03) // 범위: 11wk-1 까지

Author

최규빈

Published

December 3, 2024

항목 허용 여부 비고
강의노트 참고 허용 수업 중 제공된 강의노트나 본인이 정리한 자료를 참고 가능
구글 검색 허용 인터넷을 통한 자료 검색 및 정보 확인 가능
생성 모형 사용 허용 인공지능 기반 도구(GPT 등) 사용 가능
import os
os.environ["WANDB_MODE"] = "offline"
import pandas as pd
import numpy as np
import datasets 
import transformers
import torch
import torch.utils
/home/cgb3/anaconda3/envs/hf/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

1. sms_spam – 40점

아래의 코드를 실행하여 model, tokenizer, spam 을 불러오라.

model = transformers.AutoModelForSequenceClassification.from_pretrained(
    "distilbert/distilbert-base-uncased", num_labels=2
)
tokenizer = transformers.AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
spam = datasets.load_dataset('guebin/spam-tiny')['train'].train_test_split(test_size=0.2)
spam
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
DatasetDict({
    train: Dataset({
        features: ['sms', 'label'],
        num_rows: 8
    })
    test: Dataset({
        features: ['sms', 'label'],
        num_rows: 2
    })
})

???에 적절한 data_collator 혹은 collate_fn 을 설계하여 아래의 코드를 완성하라.

trainer = transformers.Trainer(
    model=model,
    data_collator=???,
    train_dataset=spam['train'],
    args = transformers.TrainingArguments(
        output_dir="asdf",
        remove_unused_columns=False
    )
)
trainer.train()

(풀이)

def collate_fn(single_batch):
    out = tokenizer(
        [dct['sms'] for dct in single_batch],
        padding=True,
        truncation=True,
        return_tensors="pt",
    )
    out['labels'] = torch.tensor([dct['label'] for dct in single_batch])
    return out 
trainer = transformers.Trainer(
    model=model,
    data_collator=collate_fn,
    train_dataset=spam['train'],
    eval_dataset=spam['test'],
    args = transformers.TrainingArguments(
        output_dir="asdf",
        remove_unused_columns=False
    )
)
trainer.train()
[3/3 00:01, Epoch 3/3]
Step Training Loss

TrainOutput(global_step=3, training_loss=0.5972510576248169, metrics={'train_runtime': 1.2616, 'train_samples_per_second': 19.023, 'train_steps_per_second': 2.378, 'total_flos': 347726921472.0, 'train_loss': 0.5972510576248169, 'epoch': 3.0})

2. Food101 – 60점

아래의 코드를 실행하여 model, image_processor, food 를 불러오라.

model = transformers.AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=101
)
image_processor = transformers.AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
food = datasets.load_dataset("guebin/food101-tiny")['train'].train_test_split(test_size=0.2)
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.

아래의 변환을 적용하라.

def w_trans(examples):
    return examples | image_processor(examples['image'])
food = food.with_transform(w_trans)

???에 적절한 data_collator 혹은 collate_fn 을 설계하여 아래의 코드를 완성하라.

trainer = transformers.Trainer(
    model=model,
    data_collator=???,
    train_dataset=food['train'],
    args = transformers.TrainingArguments(
        output_dir="asdf",
        remove_unused_columns=False
    )
)
trainer.train()

(풀이)

def collate_fn(single_batch):
    dct = dict()
    dct['pixel_values'] = torch.tensor(np.stack([o['pixel_values'] for o in single_batch]))
    dct['labels'] = torch.tensor([o['label'] for o in single_batch])
    return dct
trainer = transformers.Trainer(
    model=model,
    data_collator=collate_fn,
    train_dataset=food['train'],
    eval_dataset=food['test'],
    args = transformers.TrainingArguments(
        output_dir="asdf",
        remove_unused_columns=False
    )
)
trainer.train()
[3/3 00:01, Epoch 3/3]
Step Training Loss

TrainOutput(global_step=3, training_loss=4.46260134379069, metrics={'train_runtime': 1.4387, 'train_samples_per_second': 16.682, 'train_steps_per_second': 2.085, 'total_flos': 1861457968742400.0, 'train_loss': 4.46260134379069, 'epoch': 3.0})