diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index 575d114dd5d9..7b4dfa97e9c9 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -8,7 +8,6 @@ import { groupIconVitePlugin, localIconLoader } from 'vitepress-plugin-group-icons' -import llmstxt from 'vitepress-plugin-llms' const prod = !!process.env.NETLIFY @@ -123,15 +122,15 @@ export default defineConfig({ ), firebase: 'logos:firebase' } - }), - prod && - llmstxt({ - workDir: 'en', - ignoreFiles: ['index.md'] - }) + }) ] }, - + llms: prod + ? { + workDir: 'en', + ignoreFiles: ['index.md'] + } + : false, transformPageData: prod ? (pageData, ctx) => { const site = resolveSiteDataByRoute( diff --git a/docs/package.json b/docs/package.json index d37f1250e21f..a47463fa8ca0 100644 --- a/docs/package.json +++ b/docs/package.json @@ -15,7 +15,6 @@ "open-cli": "^8.0.0", "postcss-rtlcss": "^5.7.0", "vitepress": "workspace:*", - "vitepress-plugin-group-icons": "^1.4.1", - "vitepress-plugin-llms": "^1.1.0" + "vitepress-plugin-group-icons": "^1.4.1" } } diff --git a/package.json b/package.json index c3558638208f..2a9c399dfb8b 100644 --- a/package.json +++ b/package.json @@ -106,10 +106,18 @@ "@vue/shared": "^3.5.13", "@vueuse/core": "^13.1.0", "@vueuse/integrations": "^13.1.0", + "byte-size": "^9.0.1", "focus-trap": "^7.6.4", "mark.js": "8.11.1", + "markdown-title": "^1.0.2", + "millify": "^6.1.0", + "minimatch": "^10.0.1", "minisearch": "^7.1.2", + "remark": "^15.0.1", + "remark-frontmatter": "^5.0.0", "shiki": "^3.2.2", + "tokenx": "^0.4.1", + "unist-util-remove": "^4.0.0", "vite": "^6.2.6", "vue": "^3.5.13" }, @@ -129,6 +137,7 @@ "@rollup/plugin-json": "^6.1.0", "@rollup/plugin-node-resolve": "^16.0.1", "@rollup/plugin-replace": "^6.0.2", + "@types/byte-size": "^8.1.2", "@types/cross-spawn": "^6.0.6", "@types/debug": "^4.1.12", "@types/fs-extra": "^11.0.4", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d4c6d0c319f3..a76911685722 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -52,18 +52,42 @@ importers: '@vueuse/integrations': specifier: ^13.1.0 version: 13.1.0(axios@1.8.4(debug@4.4.0))(focus-trap@7.6.4)(vue@3.5.13(typescript@5.8.3)) + byte-size: + specifier: ^9.0.1 + version: 9.0.1 focus-trap: specifier: ^7.6.4 version: 7.6.4 mark.js: specifier: 8.11.1 version: 8.11.1 + markdown-title: + specifier: ^1.0.2 + version: 1.0.2 + millify: + specifier: ^6.1.0 + version: 6.1.0 + minimatch: + specifier: ^10.0.1 + version: 10.0.1 minisearch: specifier: ^7.1.2 version: 7.1.2 + remark: + specifier: ^15.0.1 + version: 15.0.1 + remark-frontmatter: + specifier: ^5.0.0 + version: 5.0.0 shiki: specifier: ^3.2.2 version: 3.2.2 + tokenx: + specifier: ^0.4.1 + version: 0.4.1 + unist-util-remove: + specifier: ^4.0.0 + version: 4.0.0 vite: specifier: ^6.2.6 version: 6.2.6(@types/node@22.14.0)(jiti@1.21.7)(yaml@2.7.1) @@ -116,6 +140,9 @@ importers: '@rollup/plugin-replace': specifier: ^6.0.2 version: 6.0.2(rollup@4.39.0) + '@types/byte-size': + specifier: ^8.1.2 + version: 8.1.2 '@types/cross-spawn': specifier: ^6.0.6 version: 6.0.6 @@ -329,9 +356,6 @@ importers: vitepress-plugin-group-icons: specifier: ^1.4.1 version: 1.4.1 - vitepress-plugin-llms: - specifier: ^1.1.0 - version: 1.1.0 packages: @@ -911,6 +935,9 @@ packages: '@tokenizer/token@0.3.0': resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==} + '@types/byte-size@8.1.2': + resolution: {integrity: sha512-jGyVzYu6avI8yuqQCNTZd65tzI8HZrLjKX9sdMqZrGWVlNChu0rf6p368oVEDCYJe5BMx2Ov04tD1wqtgTwGSA==} + '@types/cross-spawn@6.0.6': resolution: {integrity: sha512-fXRhhUkG4H3TQk5dBhQ7m/JDdSNHKwR2BBia62lhwEIq9xGiQKLxd6LymNhn47SjXhsUEPmxi+PKw2OkW4LLjA==} @@ -2917,9 +2944,6 @@ packages: vitepress-plugin-group-icons@1.4.1: resolution: {integrity: sha512-4APG5wzUvl2JbZcy6+I7K9DleBJE7W5RCkPu2mDPxzKxI/9pF3GmIACDnIlhyfIpUyfW4eanbyoMuP7tzLpM3Q==} - vitepress-plugin-llms@1.1.0: - resolution: {integrity: sha512-nb7bG/lBDihlcFTzqxRxQIyzeBWQW9F6OwuUWQ7PFUNK5kVbybxXGISU4wvAV8osQmfrD9xNIGJQfuOLj5CzHg==} - vitest@3.1.1: resolution: {integrity: sha512-kiZc/IYmKICeBAZr9DQ5rT7/6bD9G7uqQEki4fxazi1jdVl2mWGzedtBs5s6llz59yQhVb7FFY2MbHzHCnT79Q==} engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} @@ -3583,6 +3607,8 @@ snapshots: '@tokenizer/token@0.3.0': {} + '@types/byte-size@8.1.2': {} + '@types/cross-spawn@6.0.6': dependencies: '@types/node': 22.14.0 @@ -5735,22 +5761,6 @@ snapshots: transitivePeerDependencies: - supports-color - vitepress-plugin-llms@1.1.0: - dependencies: - byte-size: 9.0.1 - gray-matter: 4.0.3 - markdown-title: 1.0.2 - millify: 6.1.0 - minimatch: 10.0.1 - picocolors: 1.1.1 - remark: 15.0.1 - remark-frontmatter: 5.0.0 - tokenx: 0.4.1 - unist-util-remove: 4.0.0 - transitivePeerDependencies: - - '@75lb/nature' - - supports-color - vitest@3.1.1(@types/debug@4.1.12)(@types/node@22.14.0)(jiti@1.21.7)(yaml@2.7.1): dependencies: '@vitest/expect': 3.1.1 diff --git a/src/node/config.ts b/src/node/config.ts index 959e39bd761e..97d3d43bc644 100644 --- a/src/node/config.ts +++ b/src/node/config.ts @@ -160,7 +160,8 @@ export async function resolveConfig( userConfig, sitemap: userConfig.sitemap, buildConcurrency: userConfig.buildConcurrency ?? 64, - ...(await resolvePages(srcDir, userConfig, logger, true)) + ...(await resolvePages(srcDir, userConfig, logger, true)), + llms: userConfig.llms ?? false } // to be shared with content loaders @@ -336,6 +337,7 @@ export async function resolveSiteData( scrollOffset: userConfig.scrollOffset ?? 134, cleanUrls: !!userConfig.cleanUrls, contentProps: userConfig.contentProps, + llms: userConfig.llms ?? false, additionalConfig: userConfig.additionalConfig } } diff --git a/src/node/plugin.ts b/src/node/plugin.ts index f6c3d3f98ea0..7f918cb2e87d 100644 --- a/src/node/plugin.ts +++ b/src/node/plugin.ts @@ -32,6 +32,7 @@ import { localSearchPlugin } from './plugins/localSearchPlugin' import { rewritesPlugin } from './plugins/rewritesPlugin' import { staticDataPlugin } from './plugins/staticDataPlugin' import { webFontsPlugin } from './plugins/webFontsPlugin' +import llmstxt from './plugins/llmstxt' import { slash, type PageDataPayload } from './shared' import { deserializeFunctions, serializeFunctions } from './utils/fnSerialize' @@ -80,6 +81,7 @@ export async function createVitePressPlugin( site, vue: userVuePluginOptions, vite: userViteConfig, + llms, lastUpdated, cleanUrls } = siteConfig @@ -439,6 +441,7 @@ export async function createVitePressPlugin( vuePlugin, hmrFix, webFontsPlugin(siteConfig.useWebFonts), + llms ? llmstxt(llms === true ? undefined : llms) : [], ...(userViteConfig?.plugins || []), await localSearchPlugin(siteConfig), staticDataPlugin, diff --git a/src/node/plugins/llmstxt/constants.ts b/src/node/plugins/llmstxt/constants.ts new file mode 100644 index 000000000000..629564669d04 --- /dev/null +++ b/src/node/plugins/llmstxt/constants.ts @@ -0,0 +1,11 @@ +/** Default template for the `llms.txt` file. */ +export const defaultLLMsTxtTemplate = `\ +# {title} + +{description} + +{details} + +## Table of Contents + +{toc}` diff --git a/src/node/plugins/llmstxt/helpers/index.ts b/src/node/plugins/llmstxt/helpers/index.ts new file mode 100644 index 000000000000..66dab630408a --- /dev/null +++ b/src/node/plugins/llmstxt/helpers/index.ts @@ -0,0 +1,175 @@ +import fs from 'node:fs/promises' +import path from 'node:path' + +import matter from 'gray-matter' + +import type { DefaultTheme } from 'vitepress' +import { defaultLLMsTxtTemplate } from '../constants' +import type { + LinksExtension, + LlmstxtSettings, + PreparedFile, + VitePressConfig +} from '../types' +import { generateTOC } from './toc' +import { expandTemplate, extractTitle, generateMetadata } from './utils' + +/** + * Options for generating the `llms.txt` file. + */ +export interface GenerateLLMsTxtOptions { + /** Path to the main documentation file `index.md`.*/ + indexMd: string + + /** The source directory for the files. */ + srcDir: VitePressConfig['vitepress']['srcDir'] + + /** Template to use for generating `llms.txt`. */ + LLMsTxtTemplate?: LlmstxtSettings['customLLMsTxtTemplate'] + + /** Template variables for `customLLMsTxtTemplate`. */ + templateVariables?: LlmstxtSettings['customTemplateVariables'] + + /** The VitePress configuration. */ + vitepressConfig?: VitePressConfig['vitepress']['userConfig'] + + /** The base domain for the generated links. */ + domain?: LlmstxtSettings['domain'] + + /** The link extension for generated links. */ + linksExtension?: LinksExtension + + /** Whether to use clean URLs (without the extension). */ + cleanUrls?: VitePressConfig['cleanUrls'] + + /** Optional sidebar configuration for organizing the TOC. */ + sidebar?: DefaultTheme.Sidebar +} + +/** + * Generates a LLMs.txt file with a table of contents and links to all documentation sections. + * + * @param preparedFiles - An array of prepared files. + * @param options - Options for generating the `llms.txt` file. + * @returns A string representing the content of the `llms.txt` file. + * + * @example + * ```markdown + * # Shadcn for Vue + * + * > Beautifully designed components built with Radix Vue and Tailwind CSS. + * + * ## Table of Contents + * + * - [Getting started](/docs/getting-started.md) + * - [About](/docs/about.md) + * - ... + * ``` + * + * @see https://llmstxt.org/#format + */ +export async function generateLLMsTxt( + preparedFiles: PreparedFile[], + options: GenerateLLMsTxtOptions +): Promise { + const { + indexMd, + srcDir, + LLMsTxtTemplate = defaultLLMsTxtTemplate, + templateVariables = {}, + vitepressConfig, + domain, + sidebar, + linksExtension, + cleanUrls + } = options + + // @ts-expect-error + matter.clearCache() + + const indexMdContent = await fs.readFile(indexMd, 'utf-8') + const indexMdFile = matter(indexMdContent as string) + + templateVariables.title ??= + indexMdFile.data?.hero?.name || + indexMdFile.data?.title || + vitepressConfig?.title || + vitepressConfig?.titleTemplate || + extractTitle(indexMdFile) || + 'LLMs Documentation' + + templateVariables.description ??= + indexMdFile.data?.hero?.text || + vitepressConfig?.description || + indexMdFile?.data?.description || + indexMdFile.data?.titleTemplate + + if (templateVariables.description) { + templateVariables.description = `> ${templateVariables.description}` + } + + templateVariables.details ??= + indexMdFile.data?.hero?.tagline || + indexMdFile.data?.tagline || + (!templateVariables.description && + 'This file contains links to all documentation sections.') + + templateVariables.toc ??= await generateTOC(preparedFiles, { + srcDir, + domain, + sidebarConfig: sidebar || vitepressConfig?.themeConfig?.sidebar, + linksExtension, + cleanUrls + }) + + return expandTemplate(LLMsTxtTemplate, templateVariables) +} + +/** + * Options for generating the `llms-full.txt` file. + */ +export interface GenerateLLMsFullTxtOptions { + /** The source directory for the files. */ + srcDir: VitePressConfig['vitepress']['srcDir'] + + /** The base domain for the generated links. */ + domain?: LlmstxtSettings['domain'] + + /** The link extension for generated links. */ + linksExtension?: LinksExtension + + /** Whether to use clean URLs (without the extension). */ + cleanUrls?: VitePressConfig['cleanUrls'] +} + +/** + * Generates a `llms-full.txt` file content with all documentation in one file. + * + * @param preparedFiles - An array of prepared files. + * @param options - Options for generating the `llms-full.txt` file. + * @returns A string representing the full content of the LLMs.txt file. + */ +export function generateLLMsFullTxt( + preparedFiles: PreparedFile[], + options: GenerateLLMsFullTxtOptions +) { + const { srcDir, domain, linksExtension, cleanUrls } = options + + const llmsFullTxtContent = preparedFiles + .map((preparedFile) => { + const relativePath = path.relative(srcDir, preparedFile.path) + + return matter.stringify( + preparedFile.file.content, + generateMetadata(preparedFile.file, { + domain, + filePath: relativePath, + linksExtension, + cleanUrls + }) + ) + }) + .join('\n---\n\n') + + return llmsFullTxtContent +} diff --git a/src/node/plugins/llmstxt/helpers/logger.ts b/src/node/plugins/llmstxt/helpers/logger.ts new file mode 100644 index 000000000000..8d7baf99da2f --- /dev/null +++ b/src/node/plugins/llmstxt/helpers/logger.ts @@ -0,0 +1,43 @@ +import pc from 'picocolors' + +/** + * Log prefix styling with the plugin name and separator. + * @constant {string} + */ +const logPrefix = pc.blue('llmstxt') + pc.dim(' » ') + +/** Logger object with standardized logging methods. */ +const log = { + /** + * Logs an informational message to the console. + * + * @param message - The message to log. + */ + info: (message: string) => console.log(`${logPrefix} ${message}`), + + /** + * Logs a success message to the console. + * + * @param message - The message to log. + */ + success: (message: string) => + console.log(`${logPrefix}${pc.green('✓')} ${message}`), + + /** + * Logs a warning message to the console. + * + * @param message - The message to log. + */ + warn: (message: string) => + console.warn(`${logPrefix}${pc.yellow('⚠')} ${pc.yellow(message)}`), + + /** + * Logs an error message to the console. + * + * @param message - The message to log. + */ + error: (message: string) => + console.error(`${logPrefix}${pc.red('✗')} ${pc.red(message)}`) +} + +export default log diff --git a/src/node/plugins/llmstxt/helpers/toc.ts b/src/node/plugins/llmstxt/helpers/toc.ts new file mode 100644 index 000000000000..324a13c81299 --- /dev/null +++ b/src/node/plugins/llmstxt/helpers/toc.ts @@ -0,0 +1,309 @@ +import path from 'node:path' +import type { DefaultTheme } from 'vitepress' +import type { + LinksExtension, + LlmstxtSettings, + PreparedFile, + VitePressConfig +} from '../types' +import { generateLink, stripExtPosix } from './utils' + +/** + * Generates a Markdown-formatted table of contents (TOC) link for a given file. + * + * @param file - The prepared file. + * @param domain - The base domain for the generated link. + * @param relativePath - The relative path of the file, which is converted to a `.md` link. + * @param extension - The link extension for the generated link (default is `.md`). + * @param cleanUrls - Whether to use clean URLs (without the extension). + * @returns The formatted TOC entry as a Markdown list item. + */ +export const generateTOCLink = ( + file: PreparedFile, + domain: LlmstxtSettings['domain'], + relativePath: string, + extension?: LinksExtension, + cleanUrls: VitePressConfig['cleanUrls'] = false +) => { + const description: string = file.file.data.description + return `- [${file.title}](${generateLink(stripExtPosix(relativePath), domain, extension ?? '.md', cleanUrls)})${description ? `: ${description.trim()}` : ''}\n` +} + +/** + * Recursively collects all paths from sidebar items. + * + * @param items - Array of sidebar items to process. + * @returns Array of paths collected from the sidebar items. + */ +function collectPathsFromSidebarItems( + items: DefaultTheme.SidebarItem[] +): string[] { + const paths: string[] = [] + + for (const item of items) { + // Add the current item's path if it exists + if (item.link) { + paths.push(item.link) + } + + // Recursively add paths from nested items + if (item.items && Array.isArray(item.items)) { + paths.push(...collectPathsFromSidebarItems(item.items)) + } + } + + return paths +} + +/** + * Normalizes link path for comparison, handling both index.md and directory paths. + * + * @param link - The link path to normalize. + * @returns Normalized link path for consistent comparison. + */ +export function normalizeLinkPath(link: string): string { + const normalizedPath = stripExtPosix(link) + + if (path.basename(normalizedPath) === 'index') { + return path.dirname(normalizedPath) + } + + return normalizedPath +} + +/** + * Checks if a file path matches a sidebar path, handling various path formats. + * + * @param filePath - The file path to check. + * @param sidebarPath - The sidebar path to compare against. + * @returns True if paths match, false otherwise + */ +export function isPathMatch(filePath: string, sidebarPath: string): boolean { + const normalizedFilePath = normalizeLinkPath(filePath) + const normalizedSidebarPath = normalizeLinkPath(sidebarPath) + + return ( + normalizedFilePath === normalizedSidebarPath || + normalizedFilePath === `${normalizedSidebarPath}.md` + ) +} + +/** + * Processes sidebar items and generates TOC entries in the exact order they appear in sidebar config + * + * @param section - A sidebar section + * @param preparedFiles - An array of prepared files + * @param srcDir - The VitePress source directory + * @param domain - Optional domain to prefix URLs with + * @param linksExtension - The link extension for generated links. + * @param depth - Current depth level for headings + * @returns A string representing the formatted section of the TOC + */ +async function processSidebarSection( + section: DefaultTheme.SidebarItem, + preparedFiles: PreparedFile[], + srcDir: VitePressConfig['vitepress']['srcDir'], + domain?: LlmstxtSettings['domain'], + linksExtension?: LinksExtension, + cleanUrls?: VitePressConfig['cleanUrls'], + depth = 3 +): Promise { + let sectionTOC = '' + + // Add section header only if it has text and is not just a link container + if (section.text) { + sectionTOC += `${'#'.repeat(depth)} ${section.text}\n\n` + } + + // Process items in this section + if (section.items && Array.isArray(section.items)) { + const linkItems: string[] = [] + const nestedSections: string[] = [] + + // First pass: separate link items and nested sections + await Promise.all( + section.items.map(async (item) => { + // Process nested sections + if (item.items && item.items.length > 0) { + const processedSection = await processSidebarSection( + item, + preparedFiles, + srcDir, + domain, + linksExtension, + cleanUrls, + // Increase depth for nested sections to maintain proper heading levels + depth + 1 + ) + nestedSections.push(processedSection) + } + // Process link items + else if (item.link) { + // Normalize the link for matching + const normalizedItemLink = normalizeLinkPath(item.link) + + const matchingFile = preparedFiles.find((file) => { + const relativePath = `/${stripExtPosix(path.relative(srcDir, file.path))}` + return isPathMatch(relativePath, normalizedItemLink) + }) + + if (matchingFile) { + const relativePath = path.relative(srcDir, matchingFile.path) + linkItems.push( + generateTOCLink( + matchingFile, + domain, + relativePath, + linksExtension, + cleanUrls + ) + ) + } + } + }) + ) + + // Add link items if any + if (linkItems.length > 0) { + sectionTOC += linkItems.join('') + } + + // Add a blank line before nested sections if we have link items + if (linkItems.length > 0 && nestedSections.length > 0) { + sectionTOC += '\n' + } + + // Add nested sections with appropriate spacing + if (nestedSections.length > 0) { + sectionTOC += nestedSections.join('\n') + } + } + + return sectionTOC +} + +/** + * Flattens the sidebar configuration when it's an object with path keys. + * + * @param sidebarConfig - The sidebar configuration from VitePress. + * @returns An array of sidebar items. + */ +function flattenSidebarConfig( + sidebarConfig: DefaultTheme.Sidebar +): DefaultTheme.SidebarItem[] { + // If it's already an array, return as is + if (Array.isArray(sidebarConfig)) { + return sidebarConfig + } + + // If it's an object with path keys, flatten it + if (typeof sidebarConfig === 'object') { + return Object.values(sidebarConfig).flat() + } + + // If it's neither, return an empty array + return [] +} + +/** + * Options for generating a Table of Contents (TOC). + */ +export interface GenerateTOCOptions { + /** + * The VitePress source directory. + */ + srcDir: VitePressConfig['vitepress']['srcDir'] + + /** + * Optional domain to prefix URLs with. + */ + domain?: LlmstxtSettings['domain'] + + /** + * Optional VitePress sidebar configuration. + */ + sidebarConfig?: DefaultTheme.Sidebar + + /** The link extension for generated links. */ + linksExtension?: LinksExtension + + /** Whether to use clean URLs (without the extension). */ + cleanUrls?: VitePressConfig['cleanUrls'] +} + +/** + * Generates a Table of Contents (TOC) for the provided prepared files. + * + * Each entry in the TOC is formatted as a markdown link to the corresponding + * text file. If a VitePress sidebar configuration is provided, the TOC will be + * organized into sections based on the sidebar structure, with heading levels (#, ##, ###) + * reflecting the nesting depth of the sections. + * + * @param preparedFiles - An array of prepared files. + * @param options - Options for generating the TOC. + * @returns A string representing the formatted Table of Contents. + */ +export async function generateTOC( + preparedFiles: PreparedFile[], + options: GenerateTOCOptions +): Promise { + const { srcDir, domain, sidebarConfig, linksExtension, cleanUrls } = options + let tableOfContent = '' + + let filesToProcess = preparedFiles + + // If sidebar configuration exists + if (sidebarConfig) { + // Flatten sidebar config if it's an object with path keys + const flattenedSidebarConfig = flattenSidebarConfig(sidebarConfig) + + // Process each top-level section in the flattened sidebar + if (flattenedSidebarConfig.length > 0) { + for (const section of flattenedSidebarConfig) { + tableOfContent += await processSidebarSection( + section, + filesToProcess, + srcDir, + domain, + linksExtension, + cleanUrls + ) + + // tableOfContent = `${tableOfContent.trimEnd()}\n\n` + tableOfContent += '\n' + } + + // Find files that didn't match any section + const allSidebarPaths = collectPathsFromSidebarItems( + flattenedSidebarConfig + ) + const unsortedFiles = filesToProcess.filter((file) => { + const relativePath = `/${stripExtPosix(path.relative(srcDir, file.path))}` + return !allSidebarPaths.some((sidebarPath) => + isPathMatch(relativePath, sidebarPath) + ) + }) + + // Add files that didn't match any section + if (unsortedFiles.length > 0) { + tableOfContent += '### Other\n\n' + filesToProcess = unsortedFiles + } + } + } + + const tocEntries: string[] = [] + + await Promise.all( + filesToProcess.map(async (file) => { + const relativePath = path.relative(srcDir, file.path) + tocEntries.push( + generateTOCLink(file, domain, relativePath, linksExtension, cleanUrls) + ) + }) + ) + + tableOfContent += tocEntries.join('') + + return tableOfContent +} diff --git a/src/node/plugins/llmstxt/helpers/utils.ts b/src/node/plugins/llmstxt/helpers/utils.ts new file mode 100644 index 000000000000..e346204a7906 --- /dev/null +++ b/src/node/plugins/llmstxt/helpers/utils.ts @@ -0,0 +1,207 @@ +import path from 'node:path' +import byteSize from 'byte-size' +import type { GrayMatterFile, Input } from 'gray-matter' +// @ts-expect-error +import markdownTitle from 'markdown-title' +import type { LinksExtension, LlmstxtSettings, VitePressConfig } from '../types' + +/** + * Splits a file path into its directory and file components. + * + * @param filepath - The path to the file. + * @returns An object containing the directory and file name. + */ +export const splitDirAndFile = (filepath: string) => ({ + dir: path.dirname(filepath), + file: path.basename(filepath) +}) + +/** + * Strips the file extension from a given file path. + * + * @param filepath - The path to the file. + * @returns The filename without the extension. + */ +export const stripExt = (filepath: string) => { + const { dir, file } = splitDirAndFile(filepath) + + return path.join(dir, path.basename(file, path.extname(file))) +} + +/** + * Strips the file extension from a given file path using POSIX format. + * + * @param filepath - The path to the file. + * @returns The filename without the extension in POSIX format. + */ +export const stripExtPosix = (filepath: string) => { + const { dir, file } = splitDirAndFile(filepath) + + return path.posix.join(dir, path.basename(file, path.extname(file))) +} + +/** + * Extracts the title from a markdown file's frontmatter or first heading. + * + * @param file - The markdown file to extract the title from. + * @returns The extracted title, or `undefined` if no title is found. + */ +export function extractTitle(file: GrayMatterFile): string { + const titleFromFrontmatter = file.data?.title || file.data?.titleTemplate + let titleFromMarkdown: string | undefined + + if (!titleFromFrontmatter) { + titleFromMarkdown = markdownTitle(file.content) + } + return titleFromFrontmatter || titleFromMarkdown +} + +/** + * Creates a regular expression to match a specific template variable in the format `{key}`. + * + * @param key - The name of the template variable to match. + * @returns A case-insensitive regular expression that detects `{key}` occurrences in a string. + * + * @example + * ```ts + * const regex = templateVariable('name'); + * console.log(regex.test('Hello {name}')); // true + * ``` + */ +const templateVariable = (key: string) => + new RegExp(`(\\n\\s*\\n)?\\{${key}\\}`, 'gi') + +/** + * Replaces occurrences of a template variable `{variable}` in a given content string with a provided value. + * If the value is empty or undefined, it falls back to a specified fallback value. + * + * @param content - The template string containing placeholders. + * @param variable - The template variable name to replace. + * @param value - The value to replace the variable with. + * @param fallback - An optional fallback value if `value` is empty. + * @returns A new string with the template variable replaced. + * + * @example + * ```ts + * const template = 'Hello {name}!'; + * const result = replaceTemplateVariable(template, 'name', 'Alice', 'User'); + * console.log(result); // 'Hello Alice!' + * ``` + */ +export function replaceTemplateVariable( + content: string, + variable: string, + value: string | undefined, + fallback?: string +) { + return content.replace(templateVariable(variable), (_, prefix) => { + const val = value?.length ? value : fallback?.length ? fallback : '' + return val ? `${prefix ? '\n\n' : ''}${val}` : '' + }) +} + +/** + * Expands a template string by replacing multiple template variables with their corresponding values. + * + * @param template - The template string containing placeholders. + * @param values - An object mapping variable names to their respective values. + * @returns A string with all template variables replaced. + * + * @example + * ```ts + * const template = 'Hello {name}, welcome to {place}!'; + * const values = { name: 'Alice', place: 'Wonderland' }; + * const result = expandTemplate(template, values); + * console.log(result); // 'Hello Alice, welcome to Wonderland!' + * ``` + */ +export const expandTemplate = ( + template: string, + variables: Record +) => { + return Object.entries(variables).reduce( + (result, [key, value]) => replaceTemplateVariable(result, key, value), + template + ) +} + +/** + * Generates a complete link by combining a domain, path, and an optional extension. + * + * @param domain - The base domain of the link (e.g., "https://example.com"). + * @param path - The path to append to the domain (e.g., "guide"). + * @param extension - An optional extension to append to the path (e.g., ".md"). + * @returns The generated link + */ +export const generateLink = ( + path: string, + domain?: string, + extension?: LinksExtension, + cleanUrls?: VitePressConfig['cleanUrls'] +) => + expandTemplate('{domain}/{path}{extension}', { + domain: domain || '', + path, + extension: cleanUrls ? '' : extension + }) + +/** + * Options for generating metadata for markdown files. + */ +export interface GenerateMetadataOptions { + /** Optional domain name to prepend to the URL. */ + domain?: LlmstxtSettings['domain'] + + /** Path to the file relative to the content root. */ + filePath: string + + /** The link extension for generated links. */ + linksExtension?: LinksExtension + + /** Whether to use clean URLs (without the extension). */ + cleanUrls?: VitePressConfig['cleanUrls'] +} + +/** + * Generates metadata for markdown files to provide additional context for LLMs. + * + * @param sourceFile - Parsed markdown file with frontmatter using gray-matter. + * @param options - Options for generating metadata. + * @returns Object containing metadata properties for the file. + * + * @example + * generateMetadata(preparedFile, { domain: 'https://example.com', filePath: 'docs/guide' }) + * // Returns { url: 'https://example.com/docs/guide.md', description: 'A guide' } + */ +export function generateMetadata>( + sourceFile: GrayMatter, + options: GenerateMetadataOptions +) { + const { domain, filePath, linksExtension, cleanUrls } = options + const frontmatterMetadata: Record = {} + + frontmatterMetadata.url = generateLink( + stripExtPosix(filePath), + domain, + linksExtension ?? '.md', + cleanUrls + ) + + if (sourceFile.data?.description?.length) { + frontmatterMetadata.description = sourceFile.data?.description + } + + return frontmatterMetadata +} + +/** + * Returns a human-readable string representation of the given string's size in bytes. + * + * This function calculates the byte size of a given string by creating a `Blob` + * and then converts it into a human-readable format using `byte-size`. + * + * @param string - The input string whose size needs to be determined. + * @returns A human-readable size string (e.g., "1.2 KB", "500 B"). + */ +export const getHumanReadableSizeOf = (string: string) => + byteSize(new Blob([string]).size).toString() diff --git a/src/node/plugins/llmstxt/index.ts b/src/node/plugins/llmstxt/index.ts new file mode 100644 index 000000000000..a0d5c983b251 --- /dev/null +++ b/src/node/plugins/llmstxt/index.ts @@ -0,0 +1,365 @@ +import type { Plugin, ViteDevServer } from 'vite' + +import fs from 'node:fs/promises' +import path from 'node:path' + +import matter, { type Input } from 'gray-matter' +import { minimatch } from 'minimatch' +import pc from 'picocolors' +import { remark } from 'remark' +import remarkFrontmatter from 'remark-frontmatter' + +import { remove } from 'unist-util-remove' + +import { millify } from 'millify' +import { approximateTokenSize } from 'tokenx' +import { defaultLLMsTxtTemplate } from './constants' +import { generateLLMsFullTxt, generateLLMsTxt } from './helpers/index' +import log from './helpers/logger' +import { + expandTemplate, + extractTitle, + generateMetadata, + getHumanReadableSizeOf, + stripExt +} from './helpers/utils' +import type { + CustomTemplateVariables, + LlmstxtSettings, + PreparedFile, + VitePressConfig +} from './types' + +const PLUGIN_NAME = 'llmstxt' + +/** + * [VitePress](http://vitepress.dev/) plugin for generating raw documentation + * for **LLMs** in Markdown format which is much lighter and more efficient for LLMs + * + * @param [userSettings={}] - Plugin settings. + * + * @see https://github.com/okineadev/vitepress-plugin-llms + * @see https://llmstxt.org/ + */ +export default function llmstxt(userSettings: LlmstxtSettings = {}): Plugin { + // Create a settings object with defaults explicitly merged + const settings: Omit & { workDir: string } = { + generateLLMsTxt: true, + generateLLMsFullTxt: true, + generateLLMFriendlyDocsForEachPage: true, + ignoreFiles: [], + workDir: undefined as unknown as string, + stripHTML: true, + ...userSettings + } + + // Store the resolved Vite config + let config: VitePressConfig + + // Set to store all markdown file paths + const mdFiles: Set = new Set() + + // Flag to identify which build we're in + let isSsrBuild = false + + return { + name: PLUGIN_NAME, + + /** Resolves the Vite configuration and sets up the working directory. */ + configResolved(resolvedConfig) { + config = resolvedConfig as VitePressConfig + if (settings.workDir) { + settings.workDir = path.resolve( + config.vitepress.srcDir, + settings.workDir as string + ) + } else { + settings.workDir = config.vitepress.srcDir + } + // Detect if this is the SSR build + isSsrBuild = !!resolvedConfig.build?.ssr + log.info( + `${pc.bold(PLUGIN_NAME)} initialized ${isSsrBuild ? pc.dim('(SSR build)') : pc.dim('(client build)')} with workDir: ${pc.cyan(settings.workDir as string)}` + ) + }, + + /** Configures the development server to handle `llms.txt` and markdown files for LLMs. */ + async configureServer(server: ViteDevServer) { + log.info('Dev server configured for serving plain text docs for LLMs') + server.middlewares.use(async (req, res, next) => { + if (req.url?.endsWith('.md') || req.url?.endsWith('.txt')) { + try { + // Try to read and serve the markdown file + const filePath = path.resolve( + config.vitepress?.outDir ?? 'dist', + `${stripExt(req.url)}.md` + ) + const content = await fs.readFile(filePath, 'utf-8') + res.setHeader('Content-Type', 'text/plain; charset=utf-8') + res.end(content) + return + } catch (e) { + // If file doesn't exist or can't be read, continue to next middleware + log.warn(`Failed to return ${pc.cyan(req.url)}: File not found`) + next() + } + } + + // Pass to next middleware if not handled + next() + }) + }, + + /** + * Resets the collection of markdown files when the build starts. + * This ensures we don't include stale data from previous builds. + */ + buildStart() { + mdFiles.clear() + log.info('Build started, file collection cleared') + }, + + /** + * Processes each file that Vite transforms and collects markdown files. + * + * @param _ - The file content (not used). + * @param id - The file identifier (path). + * @returns null if the file is processed, otherwise returns the original content. + */ + async transform(_, id: string) { + if (!id.endsWith('.md')) { + return null + } + + // Skip files outside workDir if it's configured + if (!id.startsWith(settings.workDir as string)) { + return null + } + + if (settings.ignoreFiles?.length) { + const shouldIgnore = await Promise.all( + settings.ignoreFiles.map(async (pattern) => { + if (typeof pattern === 'string') { + return await Promise.resolve( + minimatch( + path.relative(settings.workDir as string, id), + pattern + ) + ) + } + return false + }) + ) + + if (shouldIgnore.some((result) => result === true)) { + return null + } + } + + // Add markdown file path to our collection + mdFiles.add(id) + // Return null to avoid modifying the file + return null + }, + + /** + * Runs only in the client build (not SSR) after completion. + * This ensures the processing happens exactly once. + */ + async generateBundle() { + // Skip processing during SSR build + if (isSsrBuild) { + log.info('Skipping LLMs docs generation in SSR build') + return + } + + const outDir = config.vitepress?.outDir ?? 'dist' + + // Create output directory if it doesn't exist + try { + await fs.access(outDir) + } catch { + log.info(`Creating output directory: ${pc.cyan(outDir)}`) + await fs.mkdir(outDir, { recursive: true }) + } + + const mdFilesList = Array.from(mdFiles) + const fileCount = mdFilesList.length + + // Skip if no files found + if (fileCount === 0) { + log.warn( + `No markdown files found to process. Check your \`${pc.bold('workDir')}\` and \`${pc.bold('ignoreFiles')}\` settings.` + ) + return + } + + log.info( + `Processing ${pc.bold(fileCount.toString())} markdown files from ${pc.cyan(settings.workDir)}` + ) + + const preparedFiles: PreparedFile[] = await Promise.all( + mdFilesList.map(async (file) => { + const content = await fs.readFile(file, 'utf-8') + + let mdFile: matter.GrayMatterFile + + if (settings.stripHTML) { + const cleanedMarkdown = await remark() + .use(remarkFrontmatter) + .use(() => { + // Strip HTML tags + return (tree) => { + remove(tree, { type: 'html' }) + return tree + } + }) + .process(content) + + mdFile = matter(String(cleanedMarkdown)) + } else { + mdFile = matter(content) + } + // Extract title from frontmatter or use the first heading + const title = extractTitle(mdFile)?.trim() || 'Untitled' + + const filePath = + path.basename(file) === 'index.md' && + path.dirname(file) !== settings.workDir + ? `${path.dirname(file)}.md` + : file + + return { path: filePath, title, file: mdFile } + }) + ) + + if (settings.generateLLMFriendlyDocsForEachPage) { + await Promise.all( + preparedFiles.map(async (file) => { + const relativePath = path.relative(settings.workDir, file.path) + try { + const mdFile = file.file + const targetPath = path.resolve(outDir, relativePath) + + // Ensure target directory exists (async version) + await fs.mkdir(path.dirname(targetPath), { + recursive: true + }) + + // Copy file to output directory (async version) + await fs.writeFile( + targetPath, + matter.stringify( + mdFile.content, + generateMetadata(mdFile, { + domain: settings.domain, + filePath: relativePath + }) + ) + ) + + log.success(`Processed ${pc.cyan(relativePath)}`) + } catch (error) { + log.error( + // @ts-ignore + `Failed to process ${pc.cyan(relativePath)}: ${error.message}` + ) + } + }) + ) + } + + // Sort files by title for better organization + preparedFiles.sort((a, b) => a.title.localeCompare(b.title)) + + const tasks: Promise[] = [] + + // Generate llms.txt - table of contents with links + if (settings.generateLLMsTxt) { + const llmsTxtPath = path.resolve(outDir, 'llms.txt') + const templateVariables: CustomTemplateVariables = { + title: settings.title, + description: settings.description, + details: settings.details, + toc: settings.toc, + ...settings.customTemplateVariables + } + + tasks.push( + (async () => { + log.info(`Generating ${pc.cyan('llms.txt')}...`) + + const llmsTxt = await generateLLMsTxt(preparedFiles, { + indexMd: path.resolve(settings.workDir as string, 'index.md'), + srcDir: settings.workDir as string, + LLMsTxtTemplate: + settings.customLLMsTxtTemplate || defaultLLMsTxtTemplate, + templateVariables, + vitepressConfig: config?.vitepress?.userConfig, + domain: settings.domain, + sidebar: settings.sidebar, + linksExtension: !settings.generateLLMFriendlyDocsForEachPage + ? '.html' + : undefined, + cleanUrls: config.cleanUrls + }) + + await fs.writeFile(llmsTxtPath, llmsTxt, 'utf-8') + + log.success( + expandTemplate( + 'Generated {file} (~{tokens} tokens, {size}) with {fileCount} documentation links', + { + file: pc.cyan('llms.txt'), + tokens: pc.bold(millify(approximateTokenSize(llmsTxt))), + size: pc.bold(getHumanReadableSizeOf(llmsTxt)), + fileCount: pc.bold(fileCount.toString()) + } + ) + ) + })() + ) + } + + // Generate llms-full.txt - all content in one file + if (settings.generateLLMsFullTxt) { + const llmsFullTxtPath = path.resolve(outDir, 'llms-full.txt') + + tasks.push( + (async () => { + log.info( + `Generating full documentation bundle (${pc.cyan('llms-full.txt')})...` + ) + + const llmsFullTxt = generateLLMsFullTxt(preparedFiles, { + srcDir: settings.workDir as string, + domain: settings.domain, + linksExtension: !settings.generateLLMFriendlyDocsForEachPage + ? '.html' + : undefined, + cleanUrls: config.cleanUrls + }) + + // Write content to llms-full.txt + await fs.writeFile(llmsFullTxtPath, llmsFullTxt, 'utf-8') + log.success( + expandTemplate( + 'Generated {file} (~{tokens} tokens, {size}) with {fileCount} markdown files', + { + file: pc.cyan('llms-full.txt'), + tokens: pc.bold(millify(approximateTokenSize(llmsFullTxt))), + size: pc.bold(getHumanReadableSizeOf(llmsFullTxt)), + fileCount: pc.bold(fileCount.toString()) + } + ) + ) + })() + ) + } + + if (tasks.length) { + await Promise.all(tasks) + } + } + } +} diff --git a/src/node/plugins/llmstxt/types.d.ts b/src/node/plugins/llmstxt/types.d.ts new file mode 100644 index 000000000000..bc809c0d55cb --- /dev/null +++ b/src/node/plugins/llmstxt/types.d.ts @@ -0,0 +1,243 @@ +import type { GrayMatterFile, Input } from 'gray-matter' +import type { ResolvedConfig } from 'vite' +import type { DefaultTheme, SiteConfig, UserConfig } from 'vitepress' + +interface TemplateVariables { + /** + * The title extracted from the frontmatter or the first h1 heading in the main document (`index.md`). + * + * @example 'Awesome tool' + */ + title?: string + + /** + * The description. + * + * @example 'Blazing fast build tool' + */ + description?: string + + /** + * The details. + * + * @example 'A multi-user version of the notebook designed for companies, classrooms and research labs' + */ + details?: string + + /** + * An automatically generated **T**able **O**f **C**ontents. + * + * @example + * ```markdown + * - [Title](/foo.md): Lorem ipsum dolor sit amet, consectetur adipiscing elit. + * - [Title 2](/bar/baz.md): Cras vel nibh id ipsum pharetra efficitur. + * ``` + */ + toc?: string +} + +interface CustomTemplateVariables extends TemplateVariables { + /** Any custom variable */ + [key: string]: string | undefined +} + +export interface LlmstxtSettings extends TemplateVariables { + /** + * The domain that will be appended to the beginning of URLs in `llms.txt` and in the context of other files + * + * Domain attachment is not yet agreed upon (since it depends on the AI ​​whether it can resolve the relative paths that are currently there), but if you want you can add it + * + * ℹ️ **Note**: Domain cannot end with `/`. + * + * Without a {@link LlmstxtSettings.domain | `domain`}: + * ```markdown + * - [Title](/foo/bar.md) + * ``` + * + * With a {@link LlmstxtSettings.domain | `domain`}: + * ```markdown + * - [Title](https://example.com/foo/bar.md) + * ``` + * + * @example + * ```typescript + * llmstxt({ domain: 'https://example.com' }) + * ``` + */ + domain?: string + + /** + * Indicates whether to generate the `llms.txt` file, which contains a list of sections with corresponding links. + * + * @default true + */ + generateLLMsTxt?: boolean + + /** + * Determines whether to generate the `llms-full.txt` which contains all the documentation in one file. + * + * @default true + */ + generateLLMsFullTxt?: boolean + + /** + * Determines whether to generate an LLM-friendly version of the documentation for each page on the website. + * + * @default true + */ + generateLLMFriendlyDocsForEachPage?: boolean + + /** + * Whether to strip HTML tags from Markdown files + * + * @default true + */ + stripHTML?: boolean + + /** + * The directory from which files will be processed. + * + * This is useful for configuring the plugin to generate documentation for LLMs in a specific language. + * + * @example + * ```typescript + * llmstxt({ + * // Generate documentation for LLMs from English documentation only + * workDir: 'en' + * }) + * ``` + * + * @default vitepress.srcDir + */ + workDir?: string + + /** + * An array of file path patterns to be ignored during processing. + * + * This is useful for excluding certain files from LLMs, such as those not related to documentation (e.g., sponsors, team, etc.). + * + * @example + * ```typescript + * llmstxt({ + * ignoreFiles: [ + * 'about/team/*', + * 'sponsor/*' + * // ... + * ] + * }) + * ``` + * + * @default [] + */ + ignoreFiles?: string[] + + /** + * A custom template for the `llms.txt` file, allowing for a personalized order of elements. + * + * Available template elements include: + * + * - `{title}`: The title extracted from the frontmatter or the first h1 heading in the main document (`index.md`). + * - `{description}`: The description. + * - `{details}`: The details. + * - `{toc}`: An automatically generated **T**able **O**f **C**ontents. + * + * You can also add custom variables using the {@link LlmstxtSettings.customTemplateVariables | `customTemplateVariables`} parameter + * + * @default + * ```markdown + * # {title} + * + * > {description} + * + * {details} + * + * ## Table of Contents + * + * {toc} + * ``` + */ + customLLMsTxtTemplate?: string + + /** + * Custom variables for {@link LlmstxtSettings.customLLMsTxtTemplate | `customLLMsTxtTemplate`}. + * + * With this option you can edit or add variables to the template. + * + * You can change the title in `llms.txt` without having to change the template: + * + * @example + * ```typescript + * llmstxt({ + * customTemplateVariables: { + * title: 'Very custom title', + * } + * }) + * ``` + * + * You can also combine this with a custom template: + * + * @example + * ```typescript + * llmstxt({ + * customLLMsTxtTemplate: '# {title}\n\n{foo}', + * customTemplateVariables: { + * foo: 'Very custom title', + * } + * }) + * ``` + */ + customTemplateVariables?: CustomTemplateVariables + + /** + * VitePress {@link DefaultTheme.Sidebar | Sidebar} + * + * Here you can insert your {@link DefaultTheme.Sidebar | `sidebar`} if it is not in the VitePress configuration + * + * Usually this parameter is used in rare cases + */ + sidebar?: DefaultTheme.Sidebar +} + +/** + * Represents a prepared file, including its title and path. + */ +export type PreparedFile = { + /** + * The title of the file. + * + * @example 'Guide' + */ + title: string + + /** + * The absolute path to the file. + * + * @example 'guide/getting-started.md' + */ + path: string + + /** + * The prepared file itself. + * + * @example + * ```typescript + * { + * data: { + * title: 'Guide' + * }, + * content: 'Content goes here' + * orig: '---\ntitle: Guide\n---\n\nContent goes here' + * } + * ``` + */ + file: GrayMatterFile +} + +interface VitePressConfig + extends Omit, + ResolvedConfig { + vitepress: SiteConfig +} + +/** Represents the link extension options for generated links. */ +type LinksExtension = string | '.md' | '.html' diff --git a/src/node/siteConfig.ts b/src/node/siteConfig.ts index 82ab3162e6a8..ac7e2bccbf59 100644 --- a/src/node/siteConfig.ts +++ b/src/node/siteConfig.ts @@ -14,6 +14,7 @@ import type { SSGContext, SiteData } from './shared' +import type { LlmstxtSettings } from './plugins/llmstxt/types' import type { AdditionalConfigDict, AdditionalConfigLoader @@ -157,6 +158,8 @@ export interface UserConfig transformItems?: (items: SitemapItem[]) => Awaitable } + llms?: boolean | LlmstxtSettings + /** * Build end hook: called when SSG finish. * @param siteConfig The resolved configuration. @@ -225,6 +228,7 @@ export interface SiteConfig | 'transformHtml' | 'transformPageData' | 'sitemap' + | 'llms' > { root: string srcDir: string diff --git a/src/node/tsconfig.json b/src/node/tsconfig.json index 98a0ea23aac2..e5db1ec28830 100644 --- a/src/node/tsconfig.json +++ b/src/node/tsconfig.json @@ -4,7 +4,8 @@ "baseUrl": ".", "outDir": "../../dist/node", "types": ["node"], - "sourceMap": true + "sourceMap": true, + "rootDir": "../.." }, "include": ["."] } diff --git a/types/shared.d.ts b/types/shared.d.ts index 445bb82ee733..efeaf317b8f4 100644 --- a/types/shared.d.ts +++ b/types/shared.d.ts @@ -1,6 +1,7 @@ // types shared between server and client import type { UseDarkOptions } from '@vueuse/core' import type { SSRContext } from 'vue/server-renderer' +import type { LlmstxtSettings } from '../src/node/plugins/llmstxt/types' export type { DefaultTheme } from './default-theme.js' export type Awaitable = T | PromiseLike @@ -147,6 +148,7 @@ export interface SiteData { router: { prefetchLinks: boolean } + llms?: boolean | LlmstxtSettings additionalConfig?: | AdditionalConfigDict | AdditionalConfigLoader