import os
os.environ["WANDB_MODE"] = "offline"Quiz-9 (2024.12.03) // 범위: 11wk-1 까지
| 항목 | 허용 여부 | 비고 |
|---|---|---|
| 강의노트 참고 | 허용 | 수업 중 제공된 강의노트나 본인이 정리한 자료를 참고 가능 |
| 구글 검색 | 허용 | 인터넷을 통한 자료 검색 및 정보 확인 가능 |
| 생성 모형 사용 | 허용 | 인공지능 기반 도구(GPT 등) 사용 가능 |
import pandas as pd
import numpy as np
import datasets
import transformers
import torch
import torch.utils/home/cgb3/anaconda3/envs/hf/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
1. sms_spam – 40점
아래의 코드를 실행하여 model, tokenizer, spam 을 불러오라.
model = transformers.AutoModelForSequenceClassification.from_pretrained(
"distilbert/distilbert-base-uncased", num_labels=2
)
tokenizer = transformers.AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
spam = datasets.load_dataset('guebin/spam-tiny')['train'].train_test_split(test_size=0.2)
spamSome weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
DatasetDict({
train: Dataset({
features: ['sms', 'label'],
num_rows: 8
})
test: Dataset({
features: ['sms', 'label'],
num_rows: 2
})
})
???에 적절한 data_collator 혹은 collate_fn 을 설계하여 아래의 코드를 완성하라.
trainer = transformers.Trainer(
model=model,
data_collator=???,
train_dataset=spam['train'],
args = transformers.TrainingArguments(
output_dir="asdf",
remove_unused_columns=False
)
)
trainer.train()(풀이)
def collate_fn(single_batch):
out = tokenizer(
[dct['sms'] for dct in single_batch],
padding=True,
truncation=True,
return_tensors="pt",
)
out['labels'] = torch.tensor([dct['label'] for dct in single_batch])
return out trainer = transformers.Trainer(
model=model,
data_collator=collate_fn,
train_dataset=spam['train'],
eval_dataset=spam['test'],
args = transformers.TrainingArguments(
output_dir="asdf",
remove_unused_columns=False
)
)
trainer.train()
[3/3 00:01, Epoch 3/3]
| Step | Training Loss |
|---|
TrainOutput(global_step=3, training_loss=0.5972510576248169, metrics={'train_runtime': 1.2616, 'train_samples_per_second': 19.023, 'train_steps_per_second': 2.378, 'total_flos': 347726921472.0, 'train_loss': 0.5972510576248169, 'epoch': 3.0})
2. Food101 – 60점
아래의 코드를 실행하여 model, image_processor, food 를 불러오라.
model = transformers.AutoModelForImageClassification.from_pretrained(
"google/vit-base-patch16-224-in21k",
num_labels=101
)
image_processor = transformers.AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
food = datasets.load_dataset("guebin/food101-tiny")['train'].train_test_split(test_size=0.2)Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.
아래의 변환을 적용하라.
def w_trans(examples):
return examples | image_processor(examples['image'])
food = food.with_transform(w_trans)???에 적절한 data_collator 혹은 collate_fn 을 설계하여 아래의 코드를 완성하라.
trainer = transformers.Trainer(
model=model,
data_collator=???,
train_dataset=food['train'],
args = transformers.TrainingArguments(
output_dir="asdf",
remove_unused_columns=False
)
)
trainer.train()(풀이)
def collate_fn(single_batch):
dct = dict()
dct['pixel_values'] = torch.tensor(np.stack([o['pixel_values'] for o in single_batch]))
dct['labels'] = torch.tensor([o['label'] for o in single_batch])
return dcttrainer = transformers.Trainer(
model=model,
data_collator=collate_fn,
train_dataset=food['train'],
eval_dataset=food['test'],
args = transformers.TrainingArguments(
output_dir="asdf",
remove_unused_columns=False
)
)
trainer.train()
[3/3 00:01, Epoch 3/3]
| Step | Training Loss |
|---|
TrainOutput(global_step=3, training_loss=4.46260134379069, metrics={'train_runtime': 1.4387, 'train_samples_per_second': 16.682, 'train_steps_per_second': 2.085, 'total_flos': 1861457968742400.0, 'train_loss': 4.46260134379069, 'epoch': 3.0})