You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
memos/plugin/markdown/markdown.go

419 lines
10 KiB
Go

package markdown
import (
"bytes"
"strings"
"github.com/yuin/goldmark"
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
mast "github.com/usememos/memos/plugin/markdown/ast"
"github.com/usememos/memos/plugin/markdown/extensions"
"github.com/usememos/memos/plugin/markdown/renderer"
storepb "github.com/usememos/memos/proto/gen/store"
)
// ExtractedData contains all metadata extracted from markdown in a single pass.
type ExtractedData struct {
Tags []string
Property *storepb.MemoPayload_Property
}
// Service handles markdown metadata extraction.
// It uses goldmark to parse markdown and extract tags, properties, and snippets.
// HTML rendering is primarily done on frontend using markdown-it, but backend provides
// RenderHTML for RSS feeds and other server-side rendering needs.
type Service interface {
// ExtractAll extracts tags, properties, and references in a single parse (most efficient)
ExtractAll(content []byte) (*ExtractedData, error)
// ExtractTags returns all #tags found in content
ExtractTags(content []byte) ([]string, error)
// ExtractProperties computes boolean properties
ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error)
// RenderMarkdown renders goldmark AST back to markdown text
RenderMarkdown(content []byte) (string, error)
// RenderHTML renders markdown content to HTML
RenderHTML(content []byte) (string, error)
// GenerateSnippet creates plain text summary
GenerateSnippet(content []byte, maxLength int) (string, error)
// ValidateContent checks for syntax errors
ValidateContent(content []byte) error
// RenameTag renames all occurrences of oldTag to newTag in content
RenameTag(content []byte, oldTag, newTag string) (string, error)
}
// service implements the Service interface.
type service struct {
md goldmark.Markdown
}
// Option configures the markdown service.
type Option func(*config)
type config struct {
enableTags bool
enableWikilink bool
}
// WithTagExtension enables #tag parsing.
func WithTagExtension() Option {
return func(c *config) {
c.enableTags = true
}
}
// WithWikilinkExtension enables [[wikilink]] parsing.
func WithWikilinkExtension() Option {
return func(c *config) {
c.enableWikilink = true
}
}
// NewService creates a new markdown service with the given options.
func NewService(opts ...Option) Service {
cfg := &config{}
for _, opt := range opts {
opt(cfg)
}
exts := []goldmark.Extender{
extension.GFM, // GitHub Flavored Markdown (tables, strikethrough, task lists, autolinks)
}
// Add custom extensions based on config
if cfg.enableTags {
exts = append(exts, extensions.TagExtension)
}
if cfg.enableWikilink {
exts = append(exts, extensions.WikilinkExtension)
}
md := goldmark.New(
goldmark.WithExtensions(exts...),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(), // Generate heading IDs
),
)
return &service{
md: md,
}
}
// parse is an internal helper to parse content into AST.
func (s *service) parse(content []byte) (gast.Node, error) {
reader := text.NewReader(content)
doc := s.md.Parser().Parse(reader)
return doc, nil
}
// ExtractTags returns all #tags found in content.
func (s *service) ExtractTags(content []byte) ([]string, error) {
root, err := s.parse(content)
if err != nil {
return nil, err
}
var tags []string
// Walk the AST to find tag nodes
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
// Check for custom TagNode
if tagNode, ok := n.(*mast.TagNode); ok {
tags = append(tags, string(tagNode.Tag))
}
return gast.WalkContinue, nil
})
if err != nil {
return nil, err
}
// Deduplicate and normalize tags
return uniqueLowercase(tags), nil
}
// ExtractProperties computes boolean properties about the content.
func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
root, err := s.parse(content)
if err != nil {
return nil, err
}
prop := &storepb.MemoPayload_Property{}
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
switch n.Kind() {
case gast.KindLink, mast.KindWikilink:
prop.HasLink = true
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
prop.HasCode = true
case east.KindTaskCheckBox:
prop.HasTaskList = true
if checkBox, ok := n.(*east.TaskCheckBox); ok {
if !checkBox.IsChecked {
prop.HasIncompleteTasks = true
}
}
default:
// No special handling for other node types
}
return gast.WalkContinue, nil
})
if err != nil {
return nil, err
}
return prop, nil
}
// RenderMarkdown renders goldmark AST back to markdown text.
func (s *service) RenderMarkdown(content []byte) (string, error) {
root, err := s.parse(content)
if err != nil {
return "", err
}
mdRenderer := renderer.NewMarkdownRenderer()
return mdRenderer.Render(root, content), nil
}
// RenderHTML renders markdown content to HTML using goldmark's built-in HTML renderer.
func (s *service) RenderHTML(content []byte) (string, error) {
var buf bytes.Buffer
if err := s.md.Convert(content, &buf); err != nil {
return "", err
}
return buf.String(), nil
}
// GenerateSnippet creates a plain text summary from markdown content.
func (s *service) GenerateSnippet(content []byte, maxLength int) (string, error) {
root, err := s.parse(content)
if err != nil {
return "", err
}
var buf strings.Builder
var lastNodeWasBlock bool
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering {
// Skip code blocks and code spans entirely
switch n.Kind() {
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
return gast.WalkSkipChildren, nil
default:
// Continue walking for other node types
}
// Add space before block elements (except first)
switch n.Kind() {
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
if buf.Len() > 0 && lastNodeWasBlock {
buf.WriteByte(' ')
}
default:
// No space needed for other node types
}
}
if !entering {
// Mark that we just exited a block element
switch n.Kind() {
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
lastNodeWasBlock = true
default:
// Not a block element
}
return gast.WalkContinue, nil
}
lastNodeWasBlock = false
// Only extract plain text nodes
if textNode, ok := n.(*gast.Text); ok {
segment := textNode.Segment
buf.Write(segment.Value(content))
// Add space if this is a soft line break
if textNode.SoftLineBreak() {
buf.WriteByte(' ')
}
}
// Stop walking if we've exceeded double the max length
// (we'll truncate precisely later)
if buf.Len() > maxLength*2 {
return gast.WalkStop, nil
}
return gast.WalkContinue, nil
})
if err != nil {
return "", err
}
snippet := buf.String()
// Truncate at word boundary if needed
if len(snippet) > maxLength {
snippet = truncateAtWord(snippet, maxLength)
}
return strings.TrimSpace(snippet), nil
}
// ValidateContent checks if the markdown content is valid.
func (s *service) ValidateContent(content []byte) error {
// Try to parse the content
_, err := s.parse(content)
return err
}
// ExtractAll extracts tags, properties, and references in a single parse for efficiency.
func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
root, err := s.parse(content)
if err != nil {
return nil, err
}
data := &ExtractedData{
Tags: []string{},
Property: &storepb.MemoPayload_Property{},
}
// Single walk to collect all data
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
// Extract tags
if tagNode, ok := n.(*mast.TagNode); ok {
data.Tags = append(data.Tags, string(tagNode.Tag))
}
// Extract properties based on node kind
switch n.Kind() {
case gast.KindLink, mast.KindWikilink:
data.Property.HasLink = true
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
data.Property.HasCode = true
case east.KindTaskCheckBox:
data.Property.HasTaskList = true
if checkBox, ok := n.(*east.TaskCheckBox); ok {
if !checkBox.IsChecked {
data.Property.HasIncompleteTasks = true
}
}
default:
// No special handling for other node types
}
return gast.WalkContinue, nil
})
if err != nil {
return nil, err
}
// Deduplicate and normalize tags
data.Tags = uniqueLowercase(data.Tags)
return data, nil
}
// RenameTag renames all occurrences of oldTag to newTag in content.
func (s *service) RenameTag(content []byte, oldTag, newTag string) (string, error) {
root, err := s.parse(content)
if err != nil {
return "", err
}
// Walk the AST to find and rename tag nodes
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
// Check for custom TagNode and rename if it matches
if tagNode, ok := n.(*mast.TagNode); ok {
if string(tagNode.Tag) == oldTag {
tagNode.Tag = []byte(newTag)
}
}
return gast.WalkContinue, nil
})
if err != nil {
return "", err
}
// Render back to markdown using the already-parsed AST
mdRenderer := renderer.NewMarkdownRenderer()
return mdRenderer.Render(root, content), nil
}
// uniqueLowercase returns unique lowercase strings from input.
func uniqueLowercase(strs []string) []string {
seen := make(map[string]bool)
var result []string
for _, s := range strs {
lower := strings.ToLower(s)
if !seen[lower] {
seen[lower] = true
result = append(result, lower)
}
}
return result
}
// truncateAtWord truncates a string at the last word boundary before maxLength.
func truncateAtWord(s string, maxLength int) string {
if len(s) <= maxLength {
return s
}
// Truncate to max length
truncated := s[:maxLength]
// Find last space
lastSpace := strings.LastIndexAny(truncated, " \t\n\r")
if lastSpace > 0 {
truncated = truncated[:lastSpace]
}
return truncated + " ..."
}