การแยกวิเคราะห์

การวิเคราะห์

LLMs ที่สามารถทำตามคำสั่งได้อย่างดีสามารถมอบหมายให้ดึงข้อมูลในรูปแบบที่กำหนดได้

การใช้วิธีนี้เรียกย้ำบนการออกแบบ prompt ที่ดีและจากนั้นวิเคราะห์ผลลัพธ์ของ LLMs เพื่อให้พวกเขาสามารถดึงข้อมูลได้อย่างดี

ที่นี่เราจะใช้ Claude ซึ่งเก่งในการทำตามคำสั่ง! ดูที่ Anthropic models

from langchain_anthropic.chat_models import ChatAnthropic

model = ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=0)

ทิป: คำนึงถึงคุณภาพการดึงข้อมูลที่เหมือนกันทั้งๆ ที่ใช้วิธีการเชิงวิเคราะห์ ตรวจสอบแนวทางสำหรับคุณภาพการดึงข้อมูล

บทแนะนำนี้มีจุดประสงค์เพื่อทำให้ง่าย แต่โดยทั่วไปควรรวมเช่นเคสอ้างอิงเพื่อเพิ่มประสิทธิภาพ!

การใช้ PydanticOutputParser

ตัวอย่างต่อไปนี้ใช้อินเทรฟ PydanticOutputParser ที่ใช้ในการวิเคราะผลลัพธ์ของแบบจำลองการสนทนา

from typing import List, Optional

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator


class Person(BaseModel):
    """ข้อมูลเกี่ยวกับบุคคล"""

    name: str = Field(..., description="ชื่อของบุคคล")
    height_in_meters: float = Field(
        ..., description="ความสูงของบุคคลเป็นเมตร"
    )


class People(BaseModel):
    """ข้อมูลสำหรับการระบุเกี่ยวกับบุคคลทั้งหมดในข้อความ"""

    people: List[Person]


parser = PydanticOutputParser(pydantic_object=People)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "ตอบคำถามของผู้ใช้ ห่อผลลัพธ์ในแท็ก `json`\n{format_instructions}",
        ),
        ("human", "{query}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

มาดูว่าข้อมูลที่ส่งไปยังแบบจำลองมีอะไรบ้าง

query = "แอนนามีอายุ 23 ปี และเธอสูง 6 ฟุต"

print(prompt.format_prompt(query=query).to_string())

System: ตอบคำถามของผู้ใช้ ห่อผลลัพธ์ในแท็ก `json`
การส่งออกควรถูกรูปแบบเป็นตัวอย่าง JSON ที่ตรงตาม JSON schema ด้านล่าง

เช่น สำหรับ schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
วัตถุ {"foo": ["bar", "baz"]} เป็นตัวอย่างการจัดรูปแบบได้ดีของ schema นี้ และ วัตถุ {"properties": {"foo": ["bar", "baz"]}} ไม่ได้ถูกรูปแบบ

นี่คือฉบับร่างของ schema:
{"description": "ข้อมูลสำหรับการระบุเกี่ยวกับบุคคลทั้งหมดในข้อความ", "properties": {"people": {"title": "People", "type": "array", "items": {"$ref": "#/definitions/Person"}}}, "required": ["people"], "definitions": {"Person": {"title": "Person", "description": "ข้อมูลเกี่ยวกับบุคคล", "type": "object", "properties": {"name": {"title": "Name", "description": "ชื่อของบุคคล", "type": "string"}, "height_in_meters": {"title": "Height In Meters", "description": "ความสูงของบุคคลเป็นเมตร", "type": "number"}}, "required": ["name", "height_in_meters"]}}}

Human: แอนนามีอายุ 23 ปี และเธอสูง 6 ฟุต

chain = prompt | model | parser
chain.invoke({"query": query})

People(people=[Person(name='แอนนา', height_in_meters=1.83)])

การวิเคราะห์แบบกำหนดเอง

การสร้างแบบ prompt และ parser แบบกำหนดเองด้วย LangChain และ LCEL มันง่ายมาก

คุณสามารถใช้ฟังก์ชันง่าย ๆ เพื่อวิเคราะห์ผลลัพธ์จากโมเดลได้!

import json
import re
from typing import List, Optional

from langchain_anthropic.chat_models import ChatAnthropic
from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator


class Person(BaseModel):
    """ข้อมูลเกี่ยวกับบุคคล"""

    name: str = Field(..., description="ชื่อของบุคคล")
    height_in_meters: float = Field(
        ..., description="ความสูงของบุคคลที่แสดงในหน่วยเมตร"
    )


class People(BaseModel):
    """ข้อมูลที่ระบุเกี่ยวกับบุคคลทั้งหมดในข้อความ"""

    people: List[Person]


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "ตอบคำถามจากผู้ใช้ และแสดงคำตอบของคุณในรูปแบบ JSON ที่ตรงกับ schema ที่กำหนด: ```json\n{schema}\n```.  "
            "แน่ใจว่าคำตอบถูกห่อหุ้มด้วย ```json และ ``` tags",
        ),
        ("human", "{query}"),
    ]
).partial(schema=People.schema())


def extract_json(message: AIMessage) -> List[dict]:
    """แยกเนื้อหา JSON จากข้อความที่ JSON ถูกฝังอยู่ระหว่าง ```json และ ``` tags

    Parameters:
        text (str): ข้อความที่มีเนื้อหา JSON

    Returns:
        list: รายการของสตริง JSON ที่ถูกแยกออกมา
    """
    text = message.content
    pattern = r"```json(.*?)```"

    matches = re.findall(pattern, text, re.DOTALL)

    try:
        return [json.loads(match.strip()) for match in matches]
    except Exception:
        raise ValueError(f"การแยกไม่สำเร็จ: {message}")

query = "แอนนา อายุ 23 ปี และเธอสูง 6 ฟุต"
print(prompt.format_prompt(query=query).to_string())

System: ตอบคำถามจากผู้ใช้ และแสดงคำตอบของคุณในรูปแบบ JSON ที่ตรงกับ schema ที่กำหนด: \`\`\`json
{'title': 'People', 'description': 'ข้อมูลที่ระบุเกี่ยวกับบุคคลทั้งหมดในข้อความ', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'ข้อมูลเกี่ยวกับบุคคล', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'ชื่อของบุคคล', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'ความสูงของบุคคลที่แสดงในหน่วยเมตร', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}
\`\`\`. แน่ใจว่าคำตอบถูกห่อหุ้มด้วย \`\`\`json และ \`\`\` tags
Human: แอนนา อายุ 23 ปี และเธอสูง 6 ฟุต

chain = prompt | model | extract_json
chain.invoke({"query": query})

[{'people': [{'name': 'แอนนา', 'height_in_meters': 1.83}]}]

ห้องสมุดอื่น ๆ

ถ้าคุณกำลังมองหาวิธีการแยกออกด้วยการวิเคราะห์ ลองดูห้องสมุด Kor นั่นเป็นห้องสมุดที่เขียนโดยผู้ดูแล LangChain และมันช่วยในการสร้าง prompt ที่ใช้ตัวอย่างเข้าไป อนุญาตให้ควบคุมรูปแบบ (เช่น JSON หรือ CSV) และแสดง schema ใน TypeScript ดูเหมือนมันทำงานได้ดี!

การวิเคราะห์

การใช้ PydanticOutputParser

การวิเคราะห์แบบกำหนดเอง

ห้องสมุดอื่น ๆ

บทเรียนที่เกี่ยวข้อง