Kwangmin Kim - HuggingFace Local Models

# 토큰 정보로드를 위한 라이브러리
# 설치: pip install python-dotenv
from dotenv import load_dotenv

# 토큰 정보로드
load_dotenv()

# LangSmith 추적을 설정합니다. https://smith.langchain.com
# !pip install langchain-teddynote
from langchain_teddynote import logging

# 프로젝트 이름을 입력합니다.
logging.langsmith("CH04-Models")

모델을 다운로드 받을 경로를 설정

# 허깅페이스 모델/토크나이저를 다운로드 받을 경로
import os

# ./cache/ 경로에 다운로드 받도록 설정
os.environ["TRANSFORMERS_CACHE"] = "./cache/"
os.environ["HF_HOME"] = "./cache/"

repo_id 변수에 HuggingFace 모델의 repo ID(저장소 ID) 를 할당합니다. - microsoft/Phi-3-mini-4k-instruct 모델: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct

from langchain_huggingface import HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    model_id="microsoft/Phi-3-mini-4k-instruct",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 256,
        "top_k": 50,
        "temperature": 0.1,
    },
)
llm.invoke("Hugging Face is")

%%time
from langchain_core.prompts import PromptTemplate

template = """Summarizes TEXT in simple bullet points ordered from most important to least important.
TEXT:
{text}

KeyPoints: """

# 프롬프트 템플릿 생성
prompt = PromptTemplate.from_template(template)

# 체인 생성
chain = prompt | llm

text = """A Large Language Model (LLM) like me, ChatGPT, is a type of artificial intelligence (AI) model designed to understand, generate, and interact with human language. These models are "large" because they're built from vast amounts of text data and have billions or even trillions of parameters. Parameters are the aspects of the model that are learned from training data; they are essentially the internal settings that determine how the model interprets and generates language. LLMs work by predicting the next word in a sequence given the words that precede it, which allows them to generate coherent and contextually relevant text based on a given prompt. This capability can be applied in a variety of ways, from answering questions and composing emails to writing essays and even creating computer code. The training process for these models involves exposing them to a diverse array of text sources, such as books, articles, and websites, allowing them to learn language patterns, grammar, facts about the world, and even styles of writing. However, it's important to note that while LLMs can provide information that seems knowledgeable, their responses are generated based on patterns in the data they were trained on and not from a sentient understanding or awareness. The development and deployment of LLMs raise important considerations regarding accuracy, bias, ethical use, and the potential impact on various aspects of society, including employment, privacy, and misinformation. Researchers and developers continue to work on ways to address these challenges while improving the models' capabilities and applications."""
print(f"입력 텍스트:\n\n{text}")

# chain 실행
response = chain.invoke({"text": text})

# 결과 출력
print(response)