[28일차] ABC 부트캠프 NVIDIA 딥러닝

ABC 부트캠프

[28일차] ABC 부트캠프 NVIDIA 딥러닝

ejis 2024. 8. 14. 11:00

1. 데이터 준비

# 필요한 유틸리티를 가져옵니다.
import os
import json
import glob
from omegaconf import OmegaConf  # OmegaConf 라이브러리, 구성 파일 관리에 사용

def get_latest_model():  
    # 'nemo_experiments/TextClassification' 디렉토리 내의 모든 .nemo 체크포인트 파일 경로를 검색합니다.
    nemo_model_paths = glob.glob('nemo_experiments/TextClassification/*/checkpoints/*.nemo')
    
    # 검색된 경로를 최신 순으로 정렬합니다.
    nemo_model_paths.sort(reverse=True)
    
    # 가장 최신의 모델 경로를 반환합니다.
    return nemo_model_paths[0]

# 데이터 디렉토리 경로를 지정합니다.
DATA_DIR = '/dli/task/data/federalist_papers_HM'

# 지정한 데이터 디렉토리 내의 파일 및 폴더 목록을 출력합니다.
!ls $DATA_DIR  # 셸 명령어를 사용하여 DATA_DIR 내의 파일과 폴더 목록을 출력

2. 모델: Configuration

# Take a look at the default model portion of the config file
CONFIG_DIR = "/dli/task/nemo/examples/nlp/text_classification/conf"
CONFIG_FILE = "text_classification_config.yaml"

config = OmegaConf.load(CONFIG_DIR + "/" + CONFIG_FILE)
print(OmegaConf.to_yaml(config.model))

3. 평가: Trainer Configuration 준비

print(OmegaConf.to_yaml(config.trainer))
print(OmegaConf.to_yaml(config.exp_manager))

4. 트레이닝

# Run to save for assessment- DO NOT CHANGE
cmd_log = os.path.join(os.path.dirname(os.path.dirname(get_latest_model())),'cmd-args.log')
lightning_logs = os.path.join(os.path.dirname(os.path.dirname(get_latest_model())),'lightning_logs.txt')

with open(cmd_log, "r") as f:
    cmd = f.read()
    cmd_list = cmd.split()
with open("my_assessment/step4.json", "w") as outfile: 
    json.dump(cmd_list, outfile) 
    
with open(lightning_logs, "r") as f:
    log = f.readlines()
with open("my_assessment/step4_lightning.json", "w") as outfile:
    json.dump(log, outfile)

5. 모델 결과 저장

# 평가 결과를 저장하기 위한 리스트를 초기화합니다.
author = []  # 결과를 저장할 빈 리스트 생성

# 결과 리스트를 반복하여 처리합니다.
for result in results:  # results 리스트의 각 요소(result)에 대해 반복
    # 현재 결과의 평균값을 계산합니다.
    avg_result = sum(result) / len(result)  # result의 모든 요소의 합을 요소 개수로 나누어 평균 계산
    
    # 평균값이 0.5 미만인지 확인합니다.
    if avg_result < 0.5:
        # 평균값이 0.5 미만이면 "HAMILTON"을 리스트에 추가하고 출력합니다.
        author.append("HAMILTON")  # 'HAMILTON'을 author 리스트에 추가
        print("HAMILTON")  # 'HAMILTON'을 출력
    else:
        # 평균값이 0.5 이상이면 "MADISON"을 리스트에 추가하고 출력합니다.
        author.append("MADISON")  # 'MADISON'을 author 리스트에 추가
        print("MADISON")  # 'MADISON'을 출력
        
# 평가 결과를 JSON 형식으로 파일에 저장합니다.
with open("my_assessment/step5.json", "w") as outfile: 
    json.dump(author, outfile)  # author 리스트를 JSON 형식으로 step5.json 파일에 저장

오늘도 엔비디아 관련 수료증을 받고, 이제 기술 배움시간은 마무리가 되었네요!

이제 대망의 최종 프로젝트까지 열심히 달려보겠습니다!