跳至主要内容

[course] Build AI-Powered Apps with OpenAI and Node.js

資料來源

此篇為 Advanced AI Application Development with OpenAI and Node.js | Hands-On Course with Scott Moss @ FEM 之課程筆記整理,非原創內容,資料來源:

範例程式碼

FEM-openai-nodejs @ pjchender GitLab

Intro to LLMS

LLM (Large Language Model) 可以想成是一個超級聰明的文字機器,它們被用極大量的資料訓練,一次可以理解和用人類可以理解的方式輸出。LLM 其實就像是 ChatGPT 的大腦。

Getting Started

import 'dotenv/config';

import OpenAI from 'openai';

const openai = new OpenAI();

const results = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
// temperature 越高,openAI 就會發揮越多創意
temperature: 0,
messages: [
{
// role:
// - system: the background of the AI
// - user: human talking to the AI
// - assistant: AI talking to the human
role: 'system',
content: 'You are an AI assistant, answer any questions to the best of your ability.',
},
{
role: 'user',
content: 'Hi! ',
},
],
});

console.log(results);
// {
// id: 'chatcmpl-8bk5JfZ1c3btxV6uryDVUTIGEjYln',
// object: 'chat.completion',
// created: 1704005977,
// model: 'gpt-3.5-turbo-0613',
// choices: [
// {
// index: 0,
// message: [Object],
// logprobs: null,
// finish_reason: 'stop'
// }
// ],
// usage: { prompt_tokens: 30, completion_tokens: 9, total_tokens: 39 },
// system_fingerprint: null
// }

Simple Chat UX

Simple chat UX

import { openai } from './openai.js';
import readline from 'node:readline';

const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});

const newMessage = async (history, message) => {
const results = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages: [...history, message],
});

return results.choices[0].message;
};

const formatMessage = (userInput) => ({
role: 'user',
content: userInput,
});

const chat = () => {
const history = [
{
role: 'system',
content: 'You are an AI assistant. Answer questions or else!',
},
];

const start = () => {
rl.question('You: ', async (userInput) => {
if (userInput.toLowerCase() === 'exit') {
rl.close();
return;
}

const message = formatMessage(userInput);
const response = await newMessage(history, message);
history.push(message, response);
console.log(`\n\nAI: ${response.content}\n\n`);
start();
});
};

start();
};

console.log('Chatbot initialized. Type "exit" to quit.');
chat();

和 LLM 互動的邏輯

Embeddings and Vectors

Introducing text and code embeddings

比起文字電腦更擅長處理數字, 為了讓電腦學習單字和他們的意義,我們將文字轉換成一系列的數字(vectors), 這一系列 LLM 可以理解的向量,就稱作 embeddings。

Embedding

Embeddings are the numerical vectors that represent text in a format understandable by machine learning models.

所以我們會做的事情是:

  • 先將文字(內容可以來自影片、文件、書籍等)轉成 Langchain 的 Document
  • 把 Document 轉成 Embeddings
  • 建立一個 Vector Store
  • 透過這個 Vector Store 把使用者 query 的內容和裡面的資料做比對

Semantic search and Langchain

Semantic Search 的邏輯是將要被索引的內容,轉成一連串的數字(向量),又稱作 Embedding,接著把它保存成一個叫做 Vector Store 的東西;再來,把使用者輸入的 query 一樣轉成向量;最後,將這個向量和 Vector Store 裡的其他向量做比較,找到 Vector Store 中最靠近這個 query 的資料並回傳。

提示

Search 的這個過程並不需要對 OpenAI 下任何 prompt。

import { openai } from './openai.js';
import { Document } from 'langchain/document';
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
import { OpenAIEmbeddings } from 'langchain/embeddings/openai';

const movies = [
{
id: 1,
title: 'Stepbrother',
description: `Comedic journey full of adult humor and awkwardness.`,
},
// ...
];

// store 會 indexed 我們的 documents
const createStore = () =>
MemoryVectorStore.fromDocuments(
movies.map(
(movie) =>
new Document({
// 希望轉成向量的文字內容
pageContent: `Title: ${movie.title}\n${movie.description}`,
// 被 query 到後可以用這個取得結構化的資料
metadata: { source: movie.id, title: movie.title },
}),
),
new OpenAIEmbeddings(),
);

/**
*
* @param {string} query:使用者輸入的 query
* @param {number} count:要回傳幾筆結果
* @returns
*/
const search = async (query, count = 1) => {
const store = await createStore();

/**
* similaritySearch 會把 query 轉成向量,然後跟 store 裡面的 documents 做比較
*/
return store.similaritySearch(query, count);
};

console.log(await search('a movie that will make me feel like I am crazy'));

Document QA

什麼是 Document QA

QA system 會根據它的資料,針對你的問題給予回覆,你可以想像成 AI 幫你把文章訊息閱讀完後,根據這篇文章內容來回答你的提問,因此它的精確性是更高的。和 Search Engine 不同,Search Engine 會以條列的方式給予回應,但 QA System 則會回覆精確的答案。

Document QA 適用在使用者碰到某一個問題,不需要一個一個閱讀文章內容,而是直接得到解法或建議的作法,適合用在官方文件、Customer Support 的網站、或者請它幫你閱讀文章後回答你的問題。

Document QA 的邏輯

Document QA 並不會直接把整份文件丟給 OpenAI,而是會先拆分成多個 chunk,例如,先把 Youtube 的逐字稿下載下來,然後拆分成多個 chunk,接著,只把和使用者 query 有關的 chunk 丟給 OpenAI,請 OpenAI 根據這些 chunks 的內容來提供答案。

至於在本地的時候,決定要丟那些 chunks 給 OpenAI 的過程,就類似上面提到的 search,會先在本地搜尋有哪些是相關聯的 chunks 後,再把這些 chunks 丟給 OpenAI。

Create a Document QA

整個流程如下:

  1. 使用 Loader 匯入原始資料(例如,PDF 文件或 Youtube 影片),轉換成 Document
  2. 將 Document 切成多個 Chunk 以方便 OpenAI 處理資料
  3. 把 Document 轉換成 Open AI EmbeddingsVector Store
  4. 根據使用者提出的 query 先用 similar search 做比對,找出相似的 chunks
  5. 把使用者提出的 query 和 similar search 得到的 chunks 組成 Prompt 後丟給 OpenAI
  6. 取得 OpenAI 回覆的結果
import { openai } from './openai.js';
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
import { CharacterTextSplitter } from 'langchain/text_splitter';
import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
import { YoutubeLoader } from 'langchain/document_loaders/web/youtube';

const createStore = (docs) => MemoryVectorStore.fromDocuments(docs, new OpenAIEmbeddings());

/**
* This function takes a YouTube video URL, loads its content, and splits it into manageable chunks.
* It uses YoutubeLoader to fetch the video's content and CharacterTextSplitter to divide the content.
*/
const docsFromYTVideo = (video) => {
const loader = YoutubeLoader.createFromUrl(video, {
language: 'en',
// 類似 document 的 metadata,也就是實際的影片來源
addVideoInto: true,
});

return loader.loadAndSplit(
// 避免一次餵太多 token 給 OpenAI,導致 超過限制
new CharacterTextSplitter({
separator: ' ',
chunkSize: 2500,
// 重疊的 token 數量,避免斷句時,斷到一半使得意義不明
chunkOverlap: 100,
}),
);
};

const docsFromPDF = (pdf) => {
const loader = new PDFLoader(pdf || 'xbox.pdf');
return loader.loadAndSplit(
new CharacterTextSplitter({
separator: '. ',
chunkSize: 2500,
chunkOverlap: 200,
}),
);
};

const loadStore = async () => {
const videoDocs = await docsFromYTVideo(video);
const pdfDocs = await docsFromPDF();

return createStore([...videoDocs, ...pdfDocs]);
};

const query = async (question, count = 2) => {
const store = await loadStore();
const searchResults = await store.similaritySearch(question, count);
const chatResponse = await openai.chat.completions.create({
model: 'gpt-4',
temperature: 0,
messages: [
{
role: 'system',
content: 'You are a helpful AI assistant. Answer questions to your best ability.',
},
{
role: 'user',
content: `Answer the following question using the provided context. If you cannot answer the question with the context, don't lie and make up stuff. Just say you need more context.
Question: ${question}

Context: ${searchResults.map((r) => r.pageContent).join('\n')}`,
},
],
});
return { chatResponse, searchResults };
};

const question = process.argv[2] || 'hi';
const video = 'https://youtu.be/zR_iuq2evXo?si=cG8rODgRgXOx9_Cn';

const { chatResponse, searchResults } = await query(question);
console.log(
`Answer: ${chatResponse.choices[0].message.content}\n\nSources: ${searchResults
.map((r) => r.metadata.source)
.join(', ')}`,
);

Functional Calling

Scripts

import 'dotenv/config';
import { openai } from './openai.js';
import math from 'advanced-calculator';

const QUESTION = process.argv[2] || 'hi';

const messages = [
{
role: 'user',
content: QUESTION,
},
];

const FUNCTION_FOR_OPEN_AI = {
calculate({ expression }) {
console.log('invoke the calculate function with the expression: ', expression);
return math.evaluate(expression);
},
async generateImage({ caption }) {
console.log('invoke the generateImage function with the caption: ', caption);
const result = await openai.images.generate({ prompt: caption });
return result.data[0].url;
},
};

const getCompletion = async (messages) => {
const response = await openai.chat.completions.create({
model: 'gpt-3.5-turbo-0613',
messages,
temperature: 0,
// provide function call to GPT-3
functions: [
{
name: 'calculate',
// GPT 會根據這個描述來找到對應要執行的 function
description: 'Run a math expression',
parameters: {
type: 'object',
properties: {
expression: {
type: 'string',
description: 'Then math expression to evaluate like "2 * 3 + (21 / 2) ^ 2"',
},
},
required: ['expression'],
},
},
{
name: 'generateImage',
// GPT 會根據這個描述來找到對應要執行的 function
description: 'Create or generate image based on a caption',
parameters: {
type: 'object',
properties: {
caption: {
type: 'string',
description: 'The caption of the image to generate, like "a cat sitting on a table"',
},
},
required: ['caption'],
},
},
],
});

return response;
};

let response;
while (true) {
response = await getCompletion(messages);

// 結束對話
if (response.choices[0].finish_reason === 'stop') {
console.log(response.choices[0].message.content);
break;
} else if (response.choices[0].finish_reason === 'function_call') {
const fnName = response.choices[0].message.function_call.name;
const args = response.choices[0].message.function_call.arguments;

// 告訴 OpenAI 是那個 function 被呼叫了
messages.push({
role: 'assistant',
content: null,
function_call: {
name: fnName,
arguments: args,
},
});

// 實際執行這個 function call
const functionToCall = FUNCTION_FOR_OPEN_AI[fnName];
const params = JSON.parse(args);
const result = await functionToCall(params);

// 把計算的結果加到 messages 中,再丟回給 OpenAI
// 這樣 OpenAI 就可以根據這個結果來繼續對話
messages.push({
role: 'function',
name: fnName,
content: JSON.stringify({ result }),
});
}
}