mirror of https://github.com/usememos/memos
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
419 lines
10 KiB
Go
419 lines
10 KiB
Go
package markdown
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
|
|
"github.com/yuin/goldmark"
|
|
gast "github.com/yuin/goldmark/ast"
|
|
"github.com/yuin/goldmark/extension"
|
|
east "github.com/yuin/goldmark/extension/ast"
|
|
"github.com/yuin/goldmark/parser"
|
|
"github.com/yuin/goldmark/text"
|
|
|
|
mast "github.com/usememos/memos/plugin/markdown/ast"
|
|
"github.com/usememos/memos/plugin/markdown/extensions"
|
|
"github.com/usememos/memos/plugin/markdown/renderer"
|
|
storepb "github.com/usememos/memos/proto/gen/store"
|
|
)
|
|
|
|
// ExtractedData contains all metadata extracted from markdown in a single pass.
|
|
type ExtractedData struct {
|
|
Tags []string
|
|
Property *storepb.MemoPayload_Property
|
|
}
|
|
|
|
// Service handles markdown metadata extraction.
|
|
// It uses goldmark to parse markdown and extract tags, properties, and snippets.
|
|
// HTML rendering is primarily done on frontend using markdown-it, but backend provides
|
|
// RenderHTML for RSS feeds and other server-side rendering needs.
|
|
type Service interface {
|
|
// ExtractAll extracts tags, properties, and references in a single parse (most efficient)
|
|
ExtractAll(content []byte) (*ExtractedData, error)
|
|
|
|
// ExtractTags returns all #tags found in content
|
|
ExtractTags(content []byte) ([]string, error)
|
|
|
|
// ExtractProperties computes boolean properties
|
|
ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error)
|
|
|
|
// RenderMarkdown renders goldmark AST back to markdown text
|
|
RenderMarkdown(content []byte) (string, error)
|
|
|
|
// RenderHTML renders markdown content to HTML
|
|
RenderHTML(content []byte) (string, error)
|
|
|
|
// GenerateSnippet creates plain text summary
|
|
GenerateSnippet(content []byte, maxLength int) (string, error)
|
|
|
|
// ValidateContent checks for syntax errors
|
|
ValidateContent(content []byte) error
|
|
|
|
// RenameTag renames all occurrences of oldTag to newTag in content
|
|
RenameTag(content []byte, oldTag, newTag string) (string, error)
|
|
}
|
|
|
|
// service implements the Service interface.
|
|
type service struct {
|
|
md goldmark.Markdown
|
|
}
|
|
|
|
// Option configures the markdown service.
|
|
type Option func(*config)
|
|
|
|
type config struct {
|
|
enableTags bool
|
|
enableWikilink bool
|
|
}
|
|
|
|
// WithTagExtension enables #tag parsing.
|
|
func WithTagExtension() Option {
|
|
return func(c *config) {
|
|
c.enableTags = true
|
|
}
|
|
}
|
|
|
|
// WithWikilinkExtension enables [[wikilink]] parsing.
|
|
func WithWikilinkExtension() Option {
|
|
return func(c *config) {
|
|
c.enableWikilink = true
|
|
}
|
|
}
|
|
|
|
// NewService creates a new markdown service with the given options.
|
|
func NewService(opts ...Option) Service {
|
|
cfg := &config{}
|
|
for _, opt := range opts {
|
|
opt(cfg)
|
|
}
|
|
|
|
exts := []goldmark.Extender{
|
|
extension.GFM, // GitHub Flavored Markdown (tables, strikethrough, task lists, autolinks)
|
|
}
|
|
|
|
// Add custom extensions based on config
|
|
if cfg.enableTags {
|
|
exts = append(exts, extensions.TagExtension)
|
|
}
|
|
if cfg.enableWikilink {
|
|
exts = append(exts, extensions.WikilinkExtension)
|
|
}
|
|
|
|
md := goldmark.New(
|
|
goldmark.WithExtensions(exts...),
|
|
goldmark.WithParserOptions(
|
|
parser.WithAutoHeadingID(), // Generate heading IDs
|
|
),
|
|
)
|
|
|
|
return &service{
|
|
md: md,
|
|
}
|
|
}
|
|
|
|
// parse is an internal helper to parse content into AST.
|
|
func (s *service) parse(content []byte) (gast.Node, error) {
|
|
reader := text.NewReader(content)
|
|
doc := s.md.Parser().Parse(reader)
|
|
return doc, nil
|
|
}
|
|
|
|
// ExtractTags returns all #tags found in content.
|
|
func (s *service) ExtractTags(content []byte) ([]string, error) {
|
|
root, err := s.parse(content)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var tags []string
|
|
|
|
// Walk the AST to find tag nodes
|
|
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
|
if !entering {
|
|
return gast.WalkContinue, nil
|
|
}
|
|
|
|
// Check for custom TagNode
|
|
if tagNode, ok := n.(*mast.TagNode); ok {
|
|
tags = append(tags, string(tagNode.Tag))
|
|
}
|
|
|
|
return gast.WalkContinue, nil
|
|
})
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Deduplicate and normalize tags
|
|
return uniqueLowercase(tags), nil
|
|
}
|
|
|
|
// ExtractProperties computes boolean properties about the content.
|
|
func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
|
|
root, err := s.parse(content)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
prop := &storepb.MemoPayload_Property{}
|
|
|
|
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
|
if !entering {
|
|
return gast.WalkContinue, nil
|
|
}
|
|
|
|
switch n.Kind() {
|
|
case gast.KindLink, mast.KindWikilink:
|
|
prop.HasLink = true
|
|
|
|
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
|
|
prop.HasCode = true
|
|
|
|
case east.KindTaskCheckBox:
|
|
prop.HasTaskList = true
|
|
if checkBox, ok := n.(*east.TaskCheckBox); ok {
|
|
if !checkBox.IsChecked {
|
|
prop.HasIncompleteTasks = true
|
|
}
|
|
}
|
|
default:
|
|
// No special handling for other node types
|
|
}
|
|
|
|
return gast.WalkContinue, nil
|
|
})
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return prop, nil
|
|
}
|
|
|
|
// RenderMarkdown renders goldmark AST back to markdown text.
|
|
func (s *service) RenderMarkdown(content []byte) (string, error) {
|
|
root, err := s.parse(content)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
mdRenderer := renderer.NewMarkdownRenderer()
|
|
return mdRenderer.Render(root, content), nil
|
|
}
|
|
|
|
// RenderHTML renders markdown content to HTML using goldmark's built-in HTML renderer.
|
|
func (s *service) RenderHTML(content []byte) (string, error) {
|
|
var buf bytes.Buffer
|
|
if err := s.md.Convert(content, &buf); err != nil {
|
|
return "", err
|
|
}
|
|
return buf.String(), nil
|
|
}
|
|
|
|
// GenerateSnippet creates a plain text summary from markdown content.
|
|
func (s *service) GenerateSnippet(content []byte, maxLength int) (string, error) {
|
|
root, err := s.parse(content)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
var buf strings.Builder
|
|
var lastNodeWasBlock bool
|
|
|
|
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
|
if entering {
|
|
// Skip code blocks and code spans entirely
|
|
switch n.Kind() {
|
|
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
|
|
return gast.WalkSkipChildren, nil
|
|
default:
|
|
// Continue walking for other node types
|
|
}
|
|
|
|
// Add space before block elements (except first)
|
|
switch n.Kind() {
|
|
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
|
|
if buf.Len() > 0 && lastNodeWasBlock {
|
|
buf.WriteByte(' ')
|
|
}
|
|
default:
|
|
// No space needed for other node types
|
|
}
|
|
}
|
|
|
|
if !entering {
|
|
// Mark that we just exited a block element
|
|
switch n.Kind() {
|
|
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
|
|
lastNodeWasBlock = true
|
|
default:
|
|
// Not a block element
|
|
}
|
|
return gast.WalkContinue, nil
|
|
}
|
|
|
|
lastNodeWasBlock = false
|
|
|
|
// Only extract plain text nodes
|
|
if textNode, ok := n.(*gast.Text); ok {
|
|
segment := textNode.Segment
|
|
buf.Write(segment.Value(content))
|
|
|
|
// Add space if this is a soft line break
|
|
if textNode.SoftLineBreak() {
|
|
buf.WriteByte(' ')
|
|
}
|
|
}
|
|
|
|
// Stop walking if we've exceeded double the max length
|
|
// (we'll truncate precisely later)
|
|
if buf.Len() > maxLength*2 {
|
|
return gast.WalkStop, nil
|
|
}
|
|
|
|
return gast.WalkContinue, nil
|
|
})
|
|
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
snippet := buf.String()
|
|
|
|
// Truncate at word boundary if needed
|
|
if len(snippet) > maxLength {
|
|
snippet = truncateAtWord(snippet, maxLength)
|
|
}
|
|
|
|
return strings.TrimSpace(snippet), nil
|
|
}
|
|
|
|
// ValidateContent checks if the markdown content is valid.
|
|
func (s *service) ValidateContent(content []byte) error {
|
|
// Try to parse the content
|
|
_, err := s.parse(content)
|
|
return err
|
|
}
|
|
|
|
// ExtractAll extracts tags, properties, and references in a single parse for efficiency.
|
|
func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
|
|
root, err := s.parse(content)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
data := &ExtractedData{
|
|
Tags: []string{},
|
|
Property: &storepb.MemoPayload_Property{},
|
|
}
|
|
|
|
// Single walk to collect all data
|
|
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
|
if !entering {
|
|
return gast.WalkContinue, nil
|
|
}
|
|
|
|
// Extract tags
|
|
if tagNode, ok := n.(*mast.TagNode); ok {
|
|
data.Tags = append(data.Tags, string(tagNode.Tag))
|
|
}
|
|
|
|
// Extract properties based on node kind
|
|
switch n.Kind() {
|
|
case gast.KindLink, mast.KindWikilink:
|
|
data.Property.HasLink = true
|
|
|
|
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
|
|
data.Property.HasCode = true
|
|
|
|
case east.KindTaskCheckBox:
|
|
data.Property.HasTaskList = true
|
|
if checkBox, ok := n.(*east.TaskCheckBox); ok {
|
|
if !checkBox.IsChecked {
|
|
data.Property.HasIncompleteTasks = true
|
|
}
|
|
}
|
|
default:
|
|
// No special handling for other node types
|
|
}
|
|
|
|
return gast.WalkContinue, nil
|
|
})
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Deduplicate and normalize tags
|
|
data.Tags = uniqueLowercase(data.Tags)
|
|
|
|
return data, nil
|
|
}
|
|
|
|
// RenameTag renames all occurrences of oldTag to newTag in content.
|
|
func (s *service) RenameTag(content []byte, oldTag, newTag string) (string, error) {
|
|
root, err := s.parse(content)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Walk the AST to find and rename tag nodes
|
|
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
|
if !entering {
|
|
return gast.WalkContinue, nil
|
|
}
|
|
|
|
// Check for custom TagNode and rename if it matches
|
|
if tagNode, ok := n.(*mast.TagNode); ok {
|
|
if string(tagNode.Tag) == oldTag {
|
|
tagNode.Tag = []byte(newTag)
|
|
}
|
|
}
|
|
|
|
return gast.WalkContinue, nil
|
|
})
|
|
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Render back to markdown using the already-parsed AST
|
|
mdRenderer := renderer.NewMarkdownRenderer()
|
|
return mdRenderer.Render(root, content), nil
|
|
}
|
|
|
|
// uniqueLowercase returns unique lowercase strings from input.
|
|
func uniqueLowercase(strs []string) []string {
|
|
seen := make(map[string]bool)
|
|
var result []string
|
|
|
|
for _, s := range strs {
|
|
lower := strings.ToLower(s)
|
|
if !seen[lower] {
|
|
seen[lower] = true
|
|
result = append(result, lower)
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// truncateAtWord truncates a string at the last word boundary before maxLength.
|
|
func truncateAtWord(s string, maxLength int) string {
|
|
if len(s) <= maxLength {
|
|
return s
|
|
}
|
|
|
|
// Truncate to max length
|
|
truncated := s[:maxLength]
|
|
|
|
// Find last space
|
|
lastSpace := strings.LastIndexAny(truncated, " \t\n\r")
|
|
if lastSpace > 0 {
|
|
truncated = truncated[:lastSpace]
|
|
}
|
|
|
|
return truncated + " ..."
|
|
}
|