Skip to content

Commit 59b5368

Browse files
authored
Merge pull request #224 from takker99:page-metadata
fix(websocket) Follow Cosense's metadata generation
2 parents 830354d + eab1c7c commit 59b5368

File tree

5 files changed

+114
-74
lines changed

5 files changed

+114
-74
lines changed

websocket/__snapshots__/findMetadata.test.ts.snap

-33
This file was deleted.

websocket/change.ts

+9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ export type Change =
1111
| HelpFeelsChange
1212
| infoboxDefinitionChange
1313
| TitleChange
14+
| LinesCountChange
15+
| charsCountChange
1416
| PinChange;
1517
export interface InsertChange {
1618
_insert: string;
@@ -72,6 +74,13 @@ export interface infoboxDefinitionChange {
7274
*/
7375
infoboxDefinition: string[];
7476
}
77+
export interface LinesCountChange {
78+
linesCount: number;
79+
}
80+
export interface charsCountChange {
81+
charsCount: number;
82+
}
83+
7584
export interface PinChange {
7685
pin: number;
7786
}

websocket/findMetadata.test.ts renamed to websocket/getPageMetadataFromLines.test.ts

+46-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
import { findMetadata, getHelpfeels } from "./findMetadata.ts";
2-
import { assertEquals } from "@std/assert";
3-
import { assertSnapshot } from "@std/testing/snapshot";
1+
import {
2+
getHelpfeels,
3+
getPageMetadataFromLines,
4+
} from "./getPageMetadataFromLines.ts";
5+
import { assertEquals } from "@std/assert/equals";
46

57
// Test data for metadata extraction from a Scrapbox page
68
// This sample includes various Scrapbox syntax elements:
@@ -38,8 +40,47 @@ Prepare thumbnail
3840
3941
[https://scrapbox.io/files/65e7f4413bc95600258481fb.svg https://scrapbox.io/files/65e7f82e03949c0024a367d0.svg]`;
4042

41-
// Test findMetadata function's ability to extract various metadata from a page
42-
Deno.test("findMetadata()", (t) => assertSnapshot(t, findMetadata(text)));
43+
Deno.test("getPageMetadataFromLines()", () => {
44+
assertEquals(getPageMetadataFromLines(text), [
45+
"test page",
46+
[
47+
"normal",
48+
"link2",
49+
"hashtag",
50+
],
51+
[
52+
"/help-en/external-link",
53+
],
54+
[
55+
"scrapbox",
56+
"takker",
57+
],
58+
"https://scrapbox.io/files/65f29c24974fd8002333b160.svg",
59+
[
60+
"[normal]link",
61+
"but `this [link]` is not a link",
62+
"`Links [link] and images [https://scrapbox.io/files/65f29c0c9045b5002522c8bb.svg] in code blocks should be ignored`",
63+
"`? Need help with setup!!`",
64+
"#hashtag is recommended",
65+
],
66+
[
67+
"65f29c24974fd8002333b160",
68+
"65e7f82e03949c0024a367d0",
69+
"65e7f4413bc95600258481fb",
70+
],
71+
[
72+
"Need help with setup!!",
73+
],
74+
[
75+
"Name\t[scrapbox.icon]",
76+
"Address\tAdd [link2] here",
77+
"Phone\tAdding # won't create a link",
78+
"Strengths\tList about 3 items",
79+
],
80+
26,
81+
659,
82+
]);
83+
});
4384

4485
// Test Helpfeel extraction (lines starting with "?")
4586
// These are used for collecting questions and help requests in Scrapbox

websocket/findMetadata.ts renamed to websocket/getPageMetadataFromLines.ts

+43-18
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,22 @@ import { parseYoutube } from "../parser/youtube.ts";
1818
* @returns A tuple containing [links, projectLinks, icons, image, files, helpfeels, infoboxDefinition]
1919
* where image can be null if no suitable preview image is found
2020
*/
21-
export const findMetadata = (
21+
export const getPageMetadataFromLines = (
2222
text: string,
2323
): [
24-
string[],
25-
string[],
26-
string[],
27-
string | null,
28-
string[],
29-
string[],
30-
string[],
24+
title: string,
25+
links: string[],
26+
projectLinks: string[],
27+
icons: string[],
28+
image: string | null,
29+
descriptions: string[],
30+
files: string[],
31+
helpfeels: string[],
32+
infoboxDefinition: string[],
33+
linesCount: number,
34+
charsCount: number,
3135
] => {
32-
const blocks = parse(text, { hasTitle: true }).flatMap((block) => {
33-
switch (block.type) {
34-
case "codeBlock":
35-
case "title":
36-
return [];
37-
case "line":
38-
case "table":
39-
return block;
40-
}
41-
});
36+
const blocks = parse(text, { hasTitle: true });
4237

4338
/** Map for detecting duplicate links while preserving link type information
4439
*
@@ -49,13 +44,15 @@ export const findMetadata = (
4944
* When the same page is referenced by both formats,
5045
* we prioritize the bracket link format in the final output
5146
*/
47+
let title = "";
5248
const linksLc = new Map<string, boolean>();
5349
const links = [] as string[];
5450
const projectLinksLc = new Set<string>();
5551
const projectLinks = [] as string[];
5652
const iconsLc = new Set<string>();
5753
const icons = [] as string[];
5854
let image: string | null = null;
55+
const descriptions = [] as string[];
5956
const files = new Set<string>();
6057
const helpfeels = new Set<string>();
6158

@@ -150,11 +147,31 @@ export const findMetadata = (
150147

151148
for (const block of blocks) {
152149
switch (block.type) {
150+
case "title": {
151+
title = block.text;
152+
continue;
153+
}
153154
case "line":
155+
if (descriptions.length < 5 && block.nodes.length > 0) {
156+
descriptions.push(
157+
block.nodes[0].type === "helpfeel" ||
158+
block.nodes[0].type === "commandLine"
159+
? makeInlineCodeForDescription(block.nodes[0].raw)
160+
: block.nodes.map((node) => node.raw).join("").trim().slice(
161+
0,
162+
200,
163+
),
164+
);
165+
}
154166
for (const node of block.nodes) {
155167
lookup(node);
156168
}
157169
continue;
170+
case "codeBlock":
171+
if (descriptions.length < 5) {
172+
descriptions.push(makeInlineCodeForDescription(block.content));
173+
}
174+
continue;
158175
case "table": {
159176
for (const row of block.cells) {
160177
for (const nodes of row) {
@@ -175,17 +192,25 @@ export const findMetadata = (
175192
}
176193
}
177194

195+
const lines = text.split("\n");
178196
return [
197+
title,
179198
links,
180199
projectLinks,
181200
icons,
182201
image,
202+
descriptions,
183203
[...files],
184204
[...helpfeels],
185205
infoboxDefinition,
206+
lines.length,
207+
lines.reduce((acc, line) => acc + [...line].length, 0),
186208
];
187209
};
188210

211+
const makeInlineCodeForDescription = (text: string): `\`${string}\`` =>
212+
`\`${text.trim().replaceAll("`", "\\`").slice(0, 198)}\``;
213+
189214
const cutId = (link: string): string => link.replace(/#[a-f\d]{24,32}$/, "");
190215

191216
/** Extract Helpfeel entries from text

websocket/makeChanges.ts

+16-18
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import { diffToChanges } from "./diffToChanges.ts";
22
import type { Page } from "@cosense/types/rest";
33
import type { Change } from "./change.ts";
4-
import { findMetadata, getHelpfeels } from "./findMetadata.ts";
4+
import {
5+
getHelpfeels,
6+
getPageMetadataFromLines,
7+
} from "./getPageMetadataFromLines.ts";
58
import { isSameArray } from "./isSameArray.ts";
69
import { isString } from "@core/unknownutil/is/string";
710

@@ -22,22 +25,6 @@ export function* makeChanges(
2225
yield change;
2326
}
2427

25-
// Handle title changes
26-
// Note: We always include title change commits for new pages (`persistent === false`)
27-
// to ensure proper page initialization
28-
if (before.lines[0].text !== after_[0] || !before.persistent) {
29-
yield { title: after_[0] };
30-
}
31-
32-
// Process changes in page descriptions
33-
// Descriptions are the first 5 lines after the title (lines 1-5)
34-
// These lines provide a summary or additional context for the page
35-
const leftDescriptions = before.lines.slice(1, 6).map((line) => line.text);
36-
const rightDescriptions = after_.slice(1, 6);
37-
if (leftDescriptions.join("") !== rightDescriptions.join("")) {
38-
yield { descriptions: rightDescriptions };
39-
}
40-
4128
// Process changes in various metadata
4229
// Metadata includes:
4330
// - links: References to other pages
@@ -48,21 +35,32 @@ export function* makeChanges(
4835
// - helpfeels: Questions or help requests (lines starting with "?")
4936
// - infoboxDefinition: Structured data definitions
5037
const [
38+
title,
5139
links,
5240
projectLinks,
5341
icons,
5442
image,
43+
descriptions,
5544
files,
5645
helpfeels,
5746
infoboxDefinition,
58-
] = findMetadata(after_.join("\n"));
47+
linesCount,
48+
charsCount,
49+
] = getPageMetadataFromLines(after_.join("\n"));
50+
// Handle title changes
51+
// Note: We always include title change commits for new pages (`persistent === false`)
52+
// to ensure proper page initialization
53+
if (before.title !== title || !before.persistent) yield { title };
5954
if (!isSameArray(before.links, links)) yield { links };
6055
if (!isSameArray(before.projectLinks, projectLinks)) yield { projectLinks };
6156
if (!isSameArray(before.icons, icons)) yield { icons };
6257
if (before.image !== image) yield { image };
58+
if (!isSameArray(before.descriptions, descriptions)) yield { descriptions };
6359
if (!isSameArray(before.files, files)) yield { files };
6460
if (!isSameArray(getHelpfeels(before.lines), helpfeels)) yield { helpfeels };
6561
if (!isSameArray(before.infoboxDefinition, infoboxDefinition)) {
6662
yield { infoboxDefinition };
6763
}
64+
yield { linesCount };
65+
yield { charsCount };
6866
}

0 commit comments

Comments
 (0)