refactor: markdown parser matchers

pull/2829/head
Steven 1 year ago
parent bf905bba86
commit d165d87288

@ -79,7 +79,7 @@ func (n *BaseNode) SetNextSibling(node Node) {
func IsBlockNode(node Node) bool {
switch node.Type() {
case ParagraphNode, CodeBlockNode, HeadingNode, HorizontalRuleNode, BlockquoteNode, OrderedListNode, UnorderedListNode, TaskListNode, MathBlockNode:
case ParagraphNode, CodeBlockNode, HeadingNode, HorizontalRuleNode, BlockquoteNode, OrderedListNode, UnorderedListNode, TaskListNode, MathBlockNode, TableNode, EmbeddedContentNode:
return true
default:
return false

@ -1,7 +1,6 @@
package parser
import (
"errors"
"net/url"
"github.com/usememos/memos/plugin/gomark/ast"
@ -14,56 +13,31 @@ func NewAutoLinkParser() *AutoLinkParser {
return &AutoLinkParser{}
}
func (*AutoLinkParser) Match(tokens []*tokenizer.Token) (int, bool) {
func (*AutoLinkParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 {
return 0, false
return nil, 0
}
hasAngleBrackets := false
if tokens[0].Type == tokenizer.LessThan {
hasAngleBrackets = true
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space {
break
matchedTokens := tokenizer.GetFirstLine(tokens)
urlStr, isRawText := "", true
if matchedTokens[0].Type == tokenizer.LessThan {
greaterThanIndex := tokenizer.FindUnescaped(matchedTokens, tokenizer.GreaterThan)
if greaterThanIndex < 0 {
return nil, 0
}
contentTokens = append(contentTokens, token)
if hasAngleBrackets && token.Type == tokenizer.GreaterThan {
break
}
}
if hasAngleBrackets && contentTokens[len(contentTokens)-1].Type != tokenizer.GreaterThan {
return 0, false
}
content := tokenizer.Stringify(contentTokens)
if !hasAngleBrackets {
u, err := url.Parse(content)
matchedTokens = matchedTokens[:greaterThanIndex+1]
urlStr = tokenizer.Stringify(matchedTokens[1 : len(matchedTokens)-1])
isRawText = false
} else {
u, err := url.Parse(tokenizer.Stringify(matchedTokens))
if err != nil || u.Scheme == "" || u.Host == "" {
return 0, false
return nil, 0
}
urlStr = tokenizer.Stringify(matchedTokens)
}
return len(contentTokens), true
}
func (p *AutoLinkParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
url := tokenizer.Stringify(tokens[:size])
isRawText := true
if tokens[0].Type == tokenizer.LessThan && tokens[size-1].Type == tokenizer.GreaterThan {
isRawText = false
url = tokenizer.Stringify(tokens[1 : size-1])
}
return &ast.AutoLink{
URL: url,
URL: urlStr,
IsRawText: isRawText,
}, nil
}, len(matchedTokens)
}

@ -36,7 +36,7 @@ func TestAutoLinkParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewAutoLinkParser().Parse(tokens)
node, _ := NewAutoLinkParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,40 +11,26 @@ func NewBlockquoteParser() *BlockquoteParser {
return &BlockquoteParser{}
}
func (*BlockquoteParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
func (p *BlockquoteParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 3 {
return nil, 0
}
if tokens[0].Type != tokenizer.GreaterThan || tokens[1].Type != tokenizer.Space {
return 0, false
if matchedTokens[0].Type != tokenizer.GreaterThan || matchedTokens[1].Type != tokenizer.Space {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[2:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *BlockquoteParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[2:size]
children, err := ParseBlockWithParsers(contentTokens, []BlockParser{NewParagraphParser(), NewLineBreakParser()})
contentTokens := matchedTokens[2:]
children, err := ParseInlineWithParsers(contentTokens, []InlineParser{NewLinkParser(), NewTextParser()})
if err != nil {
return nil, err
return nil, 0
}
return &ast.Blockquote{
Children: children,
}, nil
Children: []ast.Node{
&ast.Paragraph{
Children: children,
},
},
}, len(matchedTokens)
}

@ -15,6 +15,10 @@ func TestBlockquoteParser(t *testing.T) {
text string
blockquote ast.Node
}{
{
text: ">Hello world",
blockquote: nil,
},
{
text: "> Hello world",
blockquote: &ast.Blockquote{
@ -57,15 +61,11 @@ func TestBlockquoteParser(t *testing.T) {
},
},
},
{
text: ">Hello\nworld",
blockquote: nil,
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewBlockquoteParser().Parse(tokens)
node, _ := NewBlockquoteParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.blockquote}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,52 +11,44 @@ func NewBoldParser() InlineParser {
return &BoldParser{}
}
func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 5 {
return 0, false
func (p *BoldParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 5 {
return nil, 0
}
prefixTokens := tokens[:2]
prefixTokens := matchedTokens[:2]
if prefixTokens[0].Type != prefixTokens[1].Type {
return 0, false
return nil, 0
}
prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underscore {
return 0, false
return nil, 0
}
cursor, matched := 2, false
for ; cursor < len(tokens)-1; cursor++ {
token, nextToken := tokens[cursor], tokens[cursor+1]
for ; cursor < len(matchedTokens)-1; cursor++ {
token, nextToken := matchedTokens[cursor], matchedTokens[cursor+1]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
return 0, false
return nil, 0
}
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType {
matchedTokens = matchedTokens[:cursor+2]
matched = true
break
}
}
if !matched {
return 0, false
}
return cursor + 2, true
}
func (p *BoldParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
prefixTokenType := tokens[0].Type
contentTokens := tokens[2 : size-2]
children, err := ParseInlineWithParsers(contentTokens, []InlineParser{NewLinkParser(), NewTextParser()})
size := len(matchedTokens)
children, err := ParseInlineWithParsers(matchedTokens[2:size-2], []InlineParser{NewLinkParser(), NewTextParser()})
if err != nil {
return nil, err
return nil, 0
}
return &ast.Bold{
Symbol: prefixTokenType,
Children: children,
}, nil
}, size
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,48 +11,39 @@ func NewBoldItalicParser() InlineParser {
return &BoldItalicParser{}
}
func (*BoldItalicParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 7 {
return 0, false
func (*BoldItalicParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 7 {
return nil, 0
}
prefixTokens := tokens[:3]
prefixTokens := matchedTokens[:3]
if prefixTokens[0].Type != prefixTokens[1].Type || prefixTokens[0].Type != prefixTokens[2].Type || prefixTokens[1].Type != prefixTokens[2].Type {
return 0, false
return nil, 0
}
prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underscore {
return 0, false
return nil, 0
}
cursor, matched := 3, false
for ; cursor < len(tokens)-2; cursor++ {
token, nextToken, endToken := tokens[cursor], tokens[cursor+1], tokens[cursor+2]
for ; cursor < len(matchedTokens)-2; cursor++ {
token, nextToken, endToken := matchedTokens[cursor], matchedTokens[cursor+1], matchedTokens[cursor+2]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline || endToken.Type == tokenizer.Newline {
return 0, false
return nil, 0
}
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType && endToken.Type == prefixTokenType {
matchedTokens = matchedTokens[:cursor+3]
matched = true
break
}
}
if !matched {
return 0, false
}
return cursor + 3, true
}
func (p *BoldItalicParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
prefixTokenType := tokens[0].Type
contentTokens := tokens[3 : size-3]
size := len(matchedTokens)
return &ast.BoldItalic{
Symbol: prefixTokenType,
Content: tokenizer.Stringify(contentTokens),
}, nil
Content: tokenizer.Stringify(matchedTokens[3 : size-3]),
}, len(matchedTokens)
}

@ -19,6 +19,14 @@ func TestBoldItalicParser(t *testing.T) {
text: "*Hello world!",
boldItalic: nil,
},
{
text: "*** Hello * *",
boldItalic: nil,
},
{
text: "*** Hello **",
boldItalic: nil,
},
{
text: "***Hello***",
boldItalic: &ast.BoldItalic{
@ -33,19 +41,11 @@ func TestBoldItalicParser(t *testing.T) {
Content: " Hello ",
},
},
{
text: "*** Hello * *",
boldItalic: nil,
},
{
text: "*** Hello **",
boldItalic: nil,
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewBoldItalicParser().Parse(tokens)
node, _ := NewBoldItalicParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.boldItalic}), restore.Restore([]ast.Node{node}))
}
}

@ -53,7 +53,7 @@ func TestBoldParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewBoldParser().Parse(tokens)
node, _ := NewBoldParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.bold}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,39 +11,20 @@ func NewCodeParser() *CodeParser {
return &CodeParser{}
}
func (*CodeParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
}
if tokens[0].Type != tokenizer.Backtick {
return 0, false
func (p *CodeParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 3 {
return nil, 0
}
contentTokens, matched := []*tokenizer.Token{}, false
for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
if token.Type == tokenizer.Backtick {
matched = true
break
}
contentTokens = append(contentTokens, token)
if matchedTokens[0].Type != tokenizer.Backtick {
return nil, 0
}
if !matched || len(contentTokens) == 0 {
return 0, false
nextBacktickIndex := tokenizer.FindUnescaped(matchedTokens[1:], tokenizer.Backtick)
if nextBacktickIndex < 0 {
return nil, 0
}
return len(contentTokens) + 2, true
}
func (p *CodeParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[1 : size-1]
matchedTokens = matchedTokens[:1+nextBacktickIndex+1]
return &ast.Code{
Content: tokenizer.Stringify(contentTokens),
}, nil
Content: tokenizer.Stringify(matchedTokens[1 : len(matchedTokens)-1]),
}, len(matchedTokens)
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -16,16 +14,16 @@ func NewCodeBlockParser() *CodeBlockParser {
return &CodeBlockParser{}
}
func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) {
func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 9 {
return 0, false
return nil, 0
}
if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick {
return 0, false
return nil, 0
}
if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline {
return 0, false
return nil, 0
}
cursor := 4
if tokens[3].Type != tokenizer.Newline {
@ -47,20 +45,11 @@ func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) {
}
}
if !matched {
return 0, false
}
return cursor, true
}
func (p *CodeBlockParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
languageToken := tokens[3]
contentStart, contentEnd := 5, size-4
contentStart, contentEnd := 5, cursor-4
if languageToken.Type == tokenizer.Newline {
languageToken = nil
contentStart = 4
@ -72,5 +61,5 @@ func (p *CodeBlockParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
if languageToken != nil {
codeBlock.Language = languageToken.String()
}
return codeBlock, nil
return codeBlock, cursor
}

@ -59,7 +59,7 @@ func TestCodeBlockParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewCodeBlockParser().Parse(tokens)
node, _ := NewCodeBlockParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.codeBlock}), restore.Restore([]ast.Node{node}))
}
}

@ -33,7 +33,7 @@ func TestCodeParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewCodeParser().Parse(tokens)
node, _ := NewCodeParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.code}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,50 +11,33 @@ func NewEmbeddedContentParser() *EmbeddedContentParser {
return &EmbeddedContentParser{}
}
func (*EmbeddedContentParser) Match(tokens []*tokenizer.Token) (int, bool) {
lines := tokenizer.Split(tokens, tokenizer.Newline)
if len(lines) < 1 {
return 0, false
}
firstLine := lines[0]
if len(firstLine) < 5 {
return 0, false
func (p *EmbeddedContentParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 5 {
return nil, 0
}
if firstLine[0].Type != tokenizer.ExclamationMark || firstLine[1].Type != tokenizer.LeftSquareBracket || firstLine[2].Type != tokenizer.LeftSquareBracket {
return 0, false
if matchedTokens[0].Type != tokenizer.ExclamationMark || matchedTokens[1].Type != tokenizer.LeftSquareBracket || matchedTokens[2].Type != tokenizer.LeftSquareBracket {
return nil, 0
}
matched := false
for index, token := range firstLine[:len(firstLine)-1] {
if token.Type == tokenizer.RightSquareBracket && firstLine[index+1].Type == tokenizer.RightSquareBracket && index+1 == len(firstLine)-1 {
for index, token := range matchedTokens[:len(matchedTokens)-1] {
if token.Type == tokenizer.RightSquareBracket && matchedTokens[index+1].Type == tokenizer.RightSquareBracket && index+1 == len(matchedTokens)-1 {
matched = true
break
}
}
if !matched {
return 0, false
return nil, 0
}
return len(firstLine), true
}
func (p *EmbeddedContentParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
contentTokens := matchedTokens[3 : len(matchedTokens)-2]
resourceName, params := tokenizer.Stringify(contentTokens), ""
questionMarkIndex := tokenizer.FindUnescaped(contentTokens, tokenizer.QuestionMark)
if questionMarkIndex > 0 {
resourceName, params = tokenizer.Stringify(contentTokens[:questionMarkIndex]), tokenizer.Stringify(contentTokens[questionMarkIndex+1:])
}
contentTokens := tokens[3 : size-2]
resourceName, params := "", ""
paramsIndex, ok := tokenizer.Find(contentTokens, tokenizer.QuestionMark)
if ok && paramsIndex > 0 {
resourceName = tokenizer.Stringify(contentTokens[:paramsIndex])
params = tokenizer.Stringify(contentTokens[paramsIndex+1:])
} else {
resourceName = tokenizer.Stringify(contentTokens)
}
return &ast.EmbeddedContent{
ResourceName: resourceName,
Params: params,
}, nil
}, len(matchedTokens)
}

@ -59,7 +59,7 @@ func TestEmbeddedContentParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewEmbeddedContentParser().Parse(tokens)
node, _ := NewEmbeddedContentParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.embeddedContent}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,29 +11,17 @@ func NewEscapingCharacterParser() *EscapingCharacterParser {
return &EscapingCharacterParser{}
}
func (*EscapingCharacterParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) == 0 {
return 0, false
func (p *EscapingCharacterParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 2 {
return nil, 0
}
if tokens[0].Type != tokenizer.Backslash {
return 0, false
}
if len(tokens) == 1 {
return 0, false
return nil, 0
}
if tokens[1].Type == tokenizer.Newline || tokens[1].Type == tokenizer.Space || tokens[1].Type == tokenizer.Text || tokens[1].Type == tokenizer.Number {
return 0, false
}
return 2, true
}
func (p *EscapingCharacterParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
return &ast.EscapingCharacter{
Symbol: tokens[1].Value,
}, nil
}, 2
}

@ -25,7 +25,7 @@ func TestEscapingCharacterParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewEscapingCharacterParser().Parse(tokens)
node, _ := NewEscapingCharacterParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,61 +11,34 @@ func NewHeadingParser() *HeadingParser {
return &HeadingParser{}
}
func (*HeadingParser) Match(tokens []*tokenizer.Token) (int, bool) {
level := 0
for _, token := range tokens {
if token.Type == tokenizer.PoundSign {
level++
} else {
break
}
}
if len(tokens) <= level+1 {
return 0, false
}
if tokens[level].Type != tokenizer.Space {
return 0, false
}
if level == 0 || level > 6 {
return 0, false
func (p *HeadingParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
spaceIndex := tokenizer.FindUnescaped(matchedTokens, tokenizer.Space)
if spaceIndex < 0 {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[level+1:] {
if token.Type == tokenizer.Newline {
break
for _, token := range matchedTokens[:spaceIndex] {
if token.Type != tokenizer.PoundSign {
return nil, 0
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + level + 1, true
}
func (p *HeadingParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
level := spaceIndex
if level == 0 || level > 6 {
return nil, 0
}
level := 0
for _, token := range tokens {
if token.Type == tokenizer.PoundSign {
level++
} else {
break
}
contentTokens := matchedTokens[level+1:]
if len(contentTokens) == 0 {
return nil, 0
}
contentTokens := tokens[level+1 : size]
children, err := ParseInline(contentTokens)
if err != nil {
return nil, err
return nil, 0
}
return &ast.Heading{
Level: level,
Children: children,
}, nil
}, len(contentTokens) + level + 1
}

@ -80,7 +80,7 @@ Hello World`,
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewHeadingParser().Parse(tokens)
node, _ := NewHeadingParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.heading}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,46 +11,34 @@ func NewHighlightParser() InlineParser {
return &HighlightParser{}
}
func (*HighlightParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 5 {
return 0, false
func (p *HighlightParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedToken := tokenizer.GetFirstLine(tokens)
if len(matchedToken) < 5 {
return nil, 0
}
prefixTokens := tokens[:2]
prefixTokens := matchedToken[:2]
if prefixTokens[0].Type != prefixTokens[1].Type {
return 0, false
return nil, 0
}
prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.EqualSign {
return 0, false
return nil, 0
}
cursor, matched := 2, false
for ; cursor < len(tokens)-1; cursor++ {
token, nextToken := tokens[cursor], tokens[cursor+1]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
return 0, false
}
for ; cursor < len(matchedToken)-1; cursor++ {
token, nextToken := matchedToken[cursor], matchedToken[cursor+1]
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType {
matched = true
break
}
}
if !matched {
return 0, false
}
return cursor + 2, true
}
func (p *HighlightParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
contentTokens := tokens[2 : size-2]
return &ast.Highlight{
Content: tokenizer.Stringify(contentTokens),
}, nil
Content: tokenizer.Stringify(matchedToken[2:cursor]),
}, cursor + 1
}

@ -35,7 +35,7 @@ func TestHighlightParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewHighlightParser().Parse(tokens)
node, _ := NewHighlightParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.bold}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,29 +11,21 @@ func NewHorizontalRuleParser() *HorizontalRuleParser {
return &HorizontalRuleParser{}
}
func (*HorizontalRuleParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
func (p *HorizontalRuleParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 3 {
return nil, 0
}
if tokens[0].Type != tokens[1].Type || tokens[0].Type != tokens[2].Type || tokens[1].Type != tokens[2].Type {
return 0, false
if len(matchedTokens) > 3 && matchedTokens[3].Type != tokenizer.Newline {
return nil, 0
}
if tokens[0].Type != tokenizer.Hyphen && tokens[0].Type != tokenizer.Underscore && tokens[0].Type != tokenizer.Asterisk {
return 0, false
if matchedTokens[0].Type != matchedTokens[1].Type || matchedTokens[0].Type != matchedTokens[2].Type || matchedTokens[1].Type != matchedTokens[2].Type {
return nil, 0
}
if len(tokens) > 3 && tokens[3].Type != tokenizer.Newline {
return 0, false
if matchedTokens[0].Type != tokenizer.Hyphen && matchedTokens[0].Type != tokenizer.Underscore && matchedTokens[0].Type != tokenizer.Asterisk {
return nil, 0
}
return 3, true
}
func (p *HorizontalRuleParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
return &ast.HorizontalRule{
Symbol: tokens[0].Type,
}, nil
Symbol: matchedTokens[0].Type,
}, 3
}

@ -51,7 +51,7 @@ func TestHorizontalRuleParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewHorizontalRuleParser().Parse(tokens)
node, _ := NewHorizontalRuleParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.horizontalRule}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,34 +11,33 @@ func NewImageParser() *ImageParser {
return &ImageParser{}
}
func (*ImageParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 5 {
return 0, false
func (p *ImageParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 5 {
return nil, 0
}
if tokens[0].Type != tokenizer.ExclamationMark {
return 0, false
if matchedTokens[0].Type != tokenizer.ExclamationMark {
return nil, 0
}
if tokens[1].Type != tokenizer.LeftSquareBracket {
return 0, false
if matchedTokens[1].Type != tokenizer.LeftSquareBracket {
return nil, 0
}
cursor, altText := 2, ""
for ; cursor < len(tokens)-2; cursor++ {
if tokens[cursor].Type == tokenizer.Newline {
return 0, false
}
if tokens[cursor].Type == tokenizer.RightSquareBracket {
cursor, altTokens := 2, []*tokenizer.Token{}
for ; cursor < len(matchedTokens)-2; cursor++ {
if matchedTokens[cursor].Type == tokenizer.RightSquareBracket {
break
}
altText += tokens[cursor].Value
altTokens = append(altTokens, matchedTokens[cursor])
}
if tokens[cursor+1].Type != tokenizer.LeftParenthesis {
return 0, false
if matchedTokens[cursor+1].Type != tokenizer.LeftParenthesis {
return nil, 0
}
cursor += 2
contentTokens, matched := []*tokenizer.Token{}, false
for _, token := range tokens[cursor:] {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space {
return 0, false
for _, token := range matchedTokens[cursor:] {
if token.Type == tokenizer.Space {
return nil, 0
}
if token.Type == tokenizer.RightParenthesis {
matched = true
@ -49,27 +46,11 @@ func (*ImageParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token)
}
if !matched || len(contentTokens) == 0 {
return 0, false
return nil, 0
}
return cursor + len(contentTokens) + 1, true
}
func (p *ImageParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
altTextTokens := []*tokenizer.Token{}
for _, token := range tokens[2:] {
if token.Type == tokenizer.RightSquareBracket {
break
}
altTextTokens = append(altTextTokens, token)
}
contentTokens := tokens[2+len(altTextTokens)+2 : size-1]
return &ast.Image{
AltText: tokenizer.Stringify(altTextTokens),
AltText: tokenizer.Stringify(altTokens),
URL: tokenizer.Stringify(contentTokens),
}, nil
}, 0
}

@ -40,7 +40,7 @@ func TestImageParser(t *testing.T) {
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewImageParser().Parse(tokens)
node, _ := NewImageParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.image}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -15,22 +13,20 @@ func NewItalicParser() *ItalicParser {
return &ItalicParser{}
}
func (*ItalicParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
func (p *ItalicParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 3 {
return nil, 0
}
prefixTokens := tokens[:1]
prefixTokens := matchedTokens[:1]
if prefixTokens[0].Type != tokenizer.Asterisk && prefixTokens[0].Type != tokenizer.Underscore {
return 0, false
return nil, 0
}
prefixTokenType := prefixTokens[0].Type
contentTokens := []*tokenizer.Token{}
matched := false
for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
for _, token := range matchedTokens[1:] {
if token.Type == prefixTokenType {
matched = true
break
@ -38,22 +34,11 @@ func (*ItalicParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token)
}
if !matched || len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *ItalicParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
prefixTokenType := tokens[0].Type
contentTokens := tokens[1 : size-1]
return &ast.Italic{
Symbol: prefixTokenType,
Content: tokenizer.Stringify(contentTokens),
}, nil
}, len(contentTokens) + 2
}

@ -44,7 +44,7 @@ func TestItalicParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewItalicParser().Parse(tokens)
node, _ := NewItalicParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.italic}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,21 +11,12 @@ func NewLineBreakParser() *LineBreakParser {
return &LineBreakParser{}
}
func (*LineBreakParser) Match(tokens []*tokenizer.Token) (int, bool) {
func (p *LineBreakParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) == 0 {
return 0, false
return nil, 0
}
if tokens[0].Type != tokenizer.Newline {
return 0, false
return nil, 0
}
return 1, true
}
func (p *LineBreakParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
return &ast.LineBreak{}, nil
return &ast.LineBreak{}, 1
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,62 +11,44 @@ func NewLinkParser() *LinkParser {
return &LinkParser{}
}
func (*LinkParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 5 {
return 0, false
func (p *LinkParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 5 {
return nil, 0
}
if tokens[0].Type != tokenizer.LeftSquareBracket {
return 0, false
if matchedTokens[0].Type != tokenizer.LeftSquareBracket {
return nil, 0
}
textTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.RightSquareBracket {
break
}
textTokens = append(textTokens, token)
}
if len(textTokens)+4 >= len(tokens) {
return 0, false
if len(textTokens)+4 >= len(matchedTokens) {
return nil, 0
}
if tokens[2+len(textTokens)].Type != tokenizer.LeftParenthesis {
return 0, false
if matchedTokens[2+len(textTokens)].Type != tokenizer.LeftParenthesis {
return nil, 0
}
urlTokens := []*tokenizer.Token{}
for _, token := range tokens[3+len(textTokens):] {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space {
return 0, false
for _, token := range matchedTokens[3+len(textTokens):] {
if token.Type == tokenizer.Space {
return nil, 0
}
if token.Type == tokenizer.RightParenthesis {
break
}
urlTokens = append(urlTokens, token)
}
if 4+len(urlTokens)+len(textTokens) > len(tokens) {
return 0, false
}
return 4 + len(urlTokens) + len(textTokens), true
}
func (p *LinkParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
if 4+len(urlTokens)+len(textTokens) > len(matchedTokens) {
return nil, 0
}
textTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] {
if token.Type == tokenizer.RightSquareBracket {
break
}
textTokens = append(textTokens, token)
}
urlTokens := tokens[2+len(textTokens)+1 : size-1]
return &ast.Link{
Text: tokenizer.Stringify(textTokens),
URL: tokenizer.Stringify(urlTokens),
}, nil
}, 4 + len(urlTokens) + len(textTokens)
}

@ -47,7 +47,7 @@ func TestLinkParser(t *testing.T) {
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewLinkParser().Parse(tokens)
node, _ := NewLinkParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,44 +11,29 @@ func NewMathParser() *MathParser {
return &MathParser{}
}
func (*MathParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
func (p *MathParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 3 {
return nil, 0
}
if tokens[0].Type != tokenizer.DollarSign {
return 0, false
if matchedTokens[0].Type != tokenizer.DollarSign {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
matched := false
for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.DollarSign {
matched = true
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
if len(contentTokens)+2 > len(tokens) {
return 0, false
}
if tokens[len(contentTokens)+1].Type != tokenizer.DollarSign {
return 0, false
if !matched || len(contentTokens) == 0 {
return nil, 0
}
return len(contentTokens) + 2, true
}
func (p *MathParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
return &ast.Math{
Content: tokenizer.Stringify(tokens[1 : size-1]),
}, nil
Content: tokenizer.Stringify(contentTokens),
}, len(contentTokens) + 2
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,44 +11,40 @@ func NewMathBlockParser() *MathBlockParser {
return &MathBlockParser{}
}
func (*MathBlockParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 7 {
return 0, false
func (p *MathBlockParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
rows := tokenizer.Split(tokens, tokenizer.Newline)
if len(rows) < 3 {
return nil, 0
}
if tokens[0].Type != tokenizer.DollarSign || tokens[1].Type != tokenizer.DollarSign || tokens[2].Type != tokenizer.Newline {
return 0, false
firstRow := rows[0]
if len(firstRow) != 2 {
return nil, 0
}
if firstRow[0].Type != tokenizer.DollarSign || firstRow[1].Type != tokenizer.DollarSign {
return nil, 0
}
cursor := 3
contentRows := [][]*tokenizer.Token{}
matched := false
for ; cursor < len(tokens)-2; cursor++ {
if tokens[cursor].Type == tokenizer.Newline && tokens[cursor+1].Type == tokenizer.DollarSign && tokens[cursor+2].Type == tokenizer.DollarSign {
if cursor+2 == len(tokens)-1 {
cursor += 3
matched = true
break
} else if tokens[cursor+3].Type == tokenizer.Newline {
cursor += 3
matched = true
break
}
for _, row := range rows[1:] {
if len(row) == 2 && row[0].Type == tokenizer.DollarSign && row[1].Type == tokenizer.DollarSign {
matched = true
break
}
contentRows = append(contentRows, row)
}
if !matched {
return 0, false
return nil, 0
}
return cursor, true
}
func (p *MathBlockParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
contentTokens := []*tokenizer.Token{}
for _, row := range contentRows {
contentTokens = append(contentTokens, row...)
contentTokens = append(contentTokens, &tokenizer.Token{
Type: tokenizer.Newline,
})
}
return &ast.MathBlock{
Content: tokenizer.Stringify(tokens[3 : size-3]),
}, nil
Content: tokenizer.Stringify(contentTokens),
}, 3 + len(contentTokens) + 2
}

@ -30,7 +30,7 @@ func TestMathBlockParser(t *testing.T) {
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewMathBlockParser().Parse(tokens)
node, _ := NewMathBlockParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
}
}

@ -24,7 +24,7 @@ func TestMathParser(t *testing.T) {
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewMathParser().Parse(tokens)
node, _ := NewMathParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,61 +11,37 @@ func NewOrderedListParser() *OrderedListParser {
return &OrderedListParser{}
}
func (*OrderedListParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 4 {
return 0, false
}
func (p *OrderedListParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
indent := 0
for _, token := range tokens {
for _, token := range matchedTokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
corsor := indent
if tokens[corsor].Type != tokenizer.Number || tokens[corsor+1].Type != tokenizer.Dot || tokens[corsor+2].Type != tokenizer.Space {
return 0, false
if len(matchedTokens) < indent+3 {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[corsor+3:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
corsor := indent
if matchedTokens[corsor].Type != tokenizer.Number || matchedTokens[corsor+1].Type != tokenizer.Dot || matchedTokens[corsor+2].Type != tokenizer.Space {
return nil, 0
}
contentTokens := matchedTokens[corsor+3:]
if len(contentTokens) == 0 {
return 0, false
}
return indent + len(contentTokens) + 3, true
}
func (p *OrderedListParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
indent := 0
for _, token := range tokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
contentTokens := tokens[indent+3 : size]
children, err := ParseInline(contentTokens)
if err != nil {
return nil, err
return nil, 0
}
return &ast.OrderedList{
Number: tokens[indent].Value,
Number: matchedTokens[indent].Value,
Indent: indent,
Children: children,
}, nil
}, indent + 3 + len(contentTokens)
}

@ -65,7 +65,7 @@ func TestOrderedListParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewOrderedListParser().Parse(tokens)
node, _ := NewOrderedListParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -15,31 +13,17 @@ func NewParagraphParser() *ParagraphParser {
return &ParagraphParser{}
}
func (*ParagraphParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens := []*tokenizer.Token{}
for _, token := range tokens {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens), true
}
func (p *ParagraphParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
func (p *ParagraphParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) == 0 {
return nil, 0
}
children, err := ParseInline(tokens[:size])
children, err := ParseInline(matchedTokens)
if err != nil {
return nil, err
return nil, 0
}
return &ast.Paragraph{
Children: children,
}, nil
}, len(matchedTokens)
}

@ -57,7 +57,7 @@ func TestParagraphParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewParagraphParser().Parse(tokens)
node, _ := NewParagraphParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.paragraph}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,8 +11,7 @@ type Context struct {
}
type BaseParser interface {
Match(tokens []*tokenizer.Token) (int, bool)
Parse(tokens []*tokenizer.Token) (ast.Node, error)
Match(tokens []*tokenizer.Token) (ast.Node, int)
}
type InlineParser interface {
@ -53,13 +50,9 @@ func ParseBlockWithParsers(tokens []*tokenizer.Token, blockParsers []BlockParser
var prevNode ast.Node
for len(tokens) > 0 {
for _, blockParser := range blockParsers {
size, matched := blockParser.Match(tokens)
if matched {
node, err := blockParser.Parse(tokens)
if err != nil {
return nil, errors.New("parse error")
}
node, size := blockParser.Match(tokens)
if node != nil {
// Consume matched tokens.
tokens = tokens[size:]
if prevNode != nil {
prevNode.SetNextSibling(node)
@ -102,13 +95,9 @@ func ParseInlineWithParsers(tokens []*tokenizer.Token, inlineParsers []InlinePar
var prevNode ast.Node
for len(tokens) > 0 {
for _, inlineParser := range inlineParsers {
size, matched := inlineParser.Match(tokens)
if matched {
node, err := inlineParser.Parse(tokens)
if err != nil {
return nil, errors.New("parse error")
}
node, size := inlineParser.Match(tokens)
if node != nil {
// Consume matched tokens.
tokens = tokens[size:]
if prevNode != nil {
// Merge text nodes if possible.
@ -120,8 +109,8 @@ func ParseInlineWithParsers(tokens []*tokenizer.Token, inlineParsers []InlinePar
prevNode.SetNextSibling(node)
node.SetPrevSibling(prevNode)
}
nodes = append(nodes, node)
prevNode = node
nodes = append(nodes, node)
break
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,39 +11,29 @@ func NewStrikethroughParser() *StrikethroughParser {
return &StrikethroughParser{}
}
func (*StrikethroughParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 5 {
return 0, false
func (p *StrikethroughParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 5 {
return nil, 0
}
if tokens[0].Type != tokenizer.Tilde || tokens[1].Type != tokenizer.Tilde {
return 0, false
if matchedTokens[0].Type != tokenizer.Tilde || matchedTokens[1].Type != tokenizer.Tilde {
return nil, 0
}
cursor, matched := 2, false
for ; cursor < len(tokens)-1; cursor++ {
token, nextToken := tokens[cursor], tokens[cursor+1]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
return 0, false
}
contentTokens := []*tokenizer.Token{}
matched := false
for cursor := 2; cursor < len(matchedTokens)-1; cursor++ {
token, nextToken := matchedTokens[cursor], matchedTokens[cursor+1]
if token.Type == tokenizer.Tilde && nextToken.Type == tokenizer.Tilde {
matched = true
break
}
contentTokens = append(contentTokens, token)
}
if !matched {
return 0, false
}
return cursor + 2, true
}
func (p *StrikethroughParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
if !matched || len(contentTokens) == 0 {
return nil, 0
}
contentTokens := tokens[2 : size-2]
return &ast.Strikethrough{
Content: tokenizer.Stringify(contentTokens),
}, nil
}, len(contentTokens) + 4
}

@ -41,7 +41,7 @@ func TestStrikethroughParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewStrikethroughParser().Parse(tokens)
node, _ := NewStrikethroughParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.strikethrough}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,20 +11,18 @@ func NewSubscriptParser() *SubscriptParser {
return &SubscriptParser{}
}
func (*SubscriptParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
func (p *SubscriptParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 3 {
return nil, 0
}
if tokens[0].Type != tokenizer.Tilde {
return 0, false
if matchedTokens[0].Type != tokenizer.Tilde {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
matched := false
for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Tilde {
matched = true
break
@ -34,20 +30,10 @@ func (*SubscriptParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token)
}
if !matched || len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *SubscriptParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
contentTokens := tokens[1 : size-1]
return &ast.Subscript{
Content: tokenizer.Stringify(contentTokens),
}, nil
}, len(contentTokens) + 2
}

@ -41,7 +41,7 @@ func TestSubscriptParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewSubscriptParser().Parse(tokens)
node, _ := NewSubscriptParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.subscript}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,20 +11,18 @@ func NewSuperscriptParser() *SuperscriptParser {
return &SuperscriptParser{}
}
func (*SuperscriptParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
func (p *SuperscriptParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 3 {
return nil, 0
}
if tokens[0].Type != tokenizer.Caret {
return 0, false
if matchedTokens[0].Type != tokenizer.Caret {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
matched := false
for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Caret {
matched = true
break
@ -34,20 +30,10 @@ func (*SuperscriptParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token)
}
if !matched || len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *SuperscriptParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
contentTokens := tokens[1 : size-1]
return &ast.Superscript{
Content: tokenizer.Stringify(contentTokens),
}, nil
}, len(contentTokens) + 2
}

@ -41,7 +41,7 @@ func TestSuperscriptParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewSuperscriptParser().Parse(tokens)
node, _ := NewSuperscriptParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.superscript}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,138 +11,113 @@ func NewTableParser() *TableParser {
return &TableParser{}
}
func (*TableParser) Match(tokens []*tokenizer.Token) (int, bool) {
headerTokens := []*tokenizer.Token{}
for _, token := range tokens {
if token.Type == tokenizer.Newline {
break
}
headerTokens = append(headerTokens, token)
}
if len(headerTokens) < 5 || len(tokens) < len(headerTokens)+3 {
return 0, false
func (p *TableParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
rawRows := tokenizer.Split(tokens, tokenizer.Newline)
if len(rawRows) < 3 {
return nil, 0
}
delimiterTokens := []*tokenizer.Token{}
for _, token := range tokens[len(headerTokens)+1:] {
if token.Type == tokenizer.Newline {
break
}
delimiterTokens = append(delimiterTokens, token)
}
if len(delimiterTokens) < 5 || len(tokens) < len(headerTokens)+len(delimiterTokens)+3 {
return 0, false
headerTokens := rawRows[0]
if len(headerTokens) < 3 {
return nil, 0
}
rowTokens := []*tokenizer.Token{}
for index, token := range tokens[len(headerTokens)+len(delimiterTokens)+2:] {
temp := len(headerTokens) + len(delimiterTokens) + 2 + index
if token.Type == tokenizer.Newline {
if (temp == len(tokens)-1) || (temp+1 == len(tokens)-1 && tokens[temp+1].Type == tokenizer.Newline) {
break
}
}
rowTokens = append(rowTokens, token)
}
if len(rowTokens) < 5 {
return 0, false
delimiterTokens := rawRows[1]
if len(delimiterTokens) < 3 {
return nil, 0
}
// Check header.
if len(headerTokens) < 5 {
return 0, false
return nil, 0
}
headerCells, ok := matchTableCellTokens(headerTokens)
if headerCells == 0 || !ok {
return 0, false
return nil, 0
}
// Check delimiter.
if len(delimiterTokens) < 5 {
return 0, false
return nil, 0
}
delimiterCells, ok := matchTableCellTokens(delimiterTokens)
if delimiterCells != headerCells || !ok {
return 0, false
return nil, 0
}
for index, t := range tokenizer.Split(delimiterTokens, tokenizer.Pipe) {
if index == 0 || index == headerCells {
if len(t) != 0 {
return 0, false
return nil, 0
}
continue
}
// Each delimiter cell should be like ` --- `, ` :-- `, ` --: `, ` :-: `.
if len(t) < 5 {
return 0, false
return nil, 0
}
delimiterTokens := t[1 : len(t)-1]
if len(delimiterTokens) < 3 {
return 0, false
return nil, 0
}
if (delimiterTokens[0].Type != tokenizer.Colon && delimiterTokens[0].Type != tokenizer.Hyphen) || (delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Colon && delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Hyphen) {
return 0, false
if (delimiterTokens[0].Type != tokenizer.Colon &&
delimiterTokens[0].Type != tokenizer.Hyphen) ||
(delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Colon &&
delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Hyphen) {
return nil, 0
}
for _, token := range delimiterTokens[1 : len(delimiterTokens)-1] {
if token.Type != tokenizer.Hyphen {
return 0, false
return nil, 0
}
}
}
// Check rows.
if len(rowTokens) < 5 {
return 0, false
}
rows := tokenizer.Split(rowTokens, tokenizer.Newline)
if len(rows) == 0 {
return 0, false
}
for _, row := range rows {
cells, ok := matchTableCellTokens(row)
rows := rawRows[2:]
matchedRows := 0
for _, rowTokens := range rows {
cells, ok := matchTableCellTokens(rowTokens)
if cells != headerCells || !ok {
return 0, false
break
}
matchedRows++
}
return len(headerTokens) + len(delimiterTokens) + len(rowTokens) + 2, true
}
func (p *TableParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
if matchedRows == 0 {
return nil, 0
}
rows = rows[:matchedRows]
rawRows := tokenizer.Split(tokens[:size-1], tokenizer.Newline)
headerTokens := rawRows[0]
dilimiterTokens := rawRows[1]
rowTokens := rawRows[2:]
header := make([]string, 0)
delimiter := make([]string, 0)
rows := make([][]string, 0)
rowsStr := make([][]string, 0)
cols := len(tokenizer.Split(headerTokens, tokenizer.Pipe)) - 2
for _, t := range tokenizer.Split(headerTokens, tokenizer.Pipe)[1 : cols+1] {
header = append(header, tokenizer.Stringify(t[1:len(t)-1]))
}
for _, t := range tokenizer.Split(dilimiterTokens, tokenizer.Pipe)[1 : cols+1] {
for _, t := range tokenizer.Split(delimiterTokens, tokenizer.Pipe)[1 : cols+1] {
delimiter = append(delimiter, tokenizer.Stringify(t[1:len(t)-1]))
}
for _, row := range rowTokens {
for _, row := range rows {
cells := make([]string, 0)
for _, t := range tokenizer.Split(row, tokenizer.Pipe)[1 : cols+1] {
cells = append(cells, tokenizer.Stringify(t[1:len(t)-1]))
}
rows = append(rows, cells)
rowsStr = append(rowsStr, cells)
}
size := len(headerTokens) + len(delimiterTokens) + 2
for _, row := range rows {
size += len(row)
}
size = size + len(rows) - 1
return &ast.Table{
Header: header,
Delimiter: delimiter,
Rows: rows,
}, nil
Rows: rowsStr,
}, size
}
func matchTableCellTokens(tokens []*tokenizer.Token) (int, bool) {

@ -51,7 +51,7 @@ func TestTableParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewTableParser().Parse(tokens)
node, _ := NewTableParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.table}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,35 +11,27 @@ func NewTagParser() *TagParser {
return &TagParser{}
}
func (*TagParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 2 {
return 0, false
func (p *TagParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
if len(matchedTokens) < 2 {
return nil, 0
}
if tokens[0].Type != tokenizer.PoundSign {
return 0, false
if matchedTokens[0].Type != tokenizer.PoundSign {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space || token.Type == tokenizer.PoundSign {
for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Space || token.Type == tokenizer.PoundSign {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 1, true
}
func (p *TagParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
return nil, 0
}
contentTokens := tokens[1:size]
return &ast.Tag{
Content: tokenizer.Stringify(contentTokens),
}, nil
}, len(contentTokens) + 1
}

@ -39,7 +39,7 @@ func TestTagParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewTagParser().Parse(tokens)
node, _ := NewTagParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.tag}), restore.Restore([]ast.Node{node}))
}
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,71 +11,47 @@ func NewTaskListParser() *TaskListParser {
return &TaskListParser{}
}
func (*TaskListParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 7 {
return 0, false
}
func (p *TaskListParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
indent := 0
for _, token := range tokens {
for _, token := range matchedTokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
symbolToken := tokens[indent]
if len(matchedTokens) < indent+6 {
return nil, 0
}
symbolToken := matchedTokens[indent]
if symbolToken.Type != tokenizer.Hyphen && symbolToken.Type != tokenizer.Asterisk && symbolToken.Type != tokenizer.PlusSign {
return 0, false
return nil, 0
}
if tokens[indent+1].Type != tokenizer.Space {
return 0, false
if matchedTokens[indent+1].Type != tokenizer.Space {
return nil, 0
}
if tokens[indent+2].Type != tokenizer.LeftSquareBracket || (tokens[indent+3].Type != tokenizer.Space && tokens[indent+3].Value != "x") || tokens[indent+4].Type != tokenizer.RightSquareBracket {
return 0, false
if matchedTokens[indent+2].Type != tokenizer.LeftSquareBracket || (matchedTokens[indent+3].Type != tokenizer.Space && matchedTokens[indent+3].Value != "x") || matchedTokens[indent+4].Type != tokenizer.RightSquareBracket {
return nil, 0
}
if tokens[indent+5].Type != tokenizer.Space {
return 0, false
if matchedTokens[indent+5].Type != tokenizer.Space {
return nil, 0
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[indent+6:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
contentTokens := matchedTokens[indent+6:]
if len(contentTokens) == 0 {
return 0, false
return nil, 0
}
return indent + len(contentTokens) + 6, true
}
func (p *TaskListParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
indent := 0
for _, token := range tokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
symbolToken := tokens[indent]
contentTokens := tokens[indent+6 : size]
children, err := ParseInline(contentTokens)
if err != nil {
return nil, err
return nil, 0
}
return &ast.TaskList{
Symbol: symbolToken.Type,
Indent: indent,
Complete: tokens[indent+3].Value == "x",
Complete: matchedTokens[indent+3].Value == "x",
Children: children,
}, nil
}, indent + len(contentTokens) + 6
}

@ -65,7 +65,7 @@ func TestTaskListParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewTaskListParser().Parse(tokens)
node, _ := NewTaskListParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
}
}

@ -13,18 +13,11 @@ func NewTextParser() *TextParser {
return &TextParser{}
}
func (*TextParser) Match(tokens []*tokenizer.Token) (int, bool) {
func (*TextParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) == 0 {
return 0, false
}
return 1, true
}
func (*TextParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
if len(tokens) == 0 {
return &ast.Text{}, nil
return nil, 0
}
return &ast.Text{
Content: tokens[0].String(),
}, nil
}, 1
}

@ -2,6 +2,7 @@ package tokenizer
type TokenType = string
// Special character tokens.
const (
Underscore TokenType = "_"
Asterisk TokenType = "*"
@ -29,6 +30,7 @@ const (
Space TokenType = " "
)
// Text based tokens.
const (
Number TokenType = "number"
Text TokenType = ""
@ -153,11 +155,29 @@ func Split(tokens []*Token, delimiter TokenType) [][]*Token {
return result
}
func Find(tokens []*Token, delimiter TokenType) (int, bool) {
for index, token := range tokens {
if token.Type == delimiter {
return index, true
func Find(tokens []*Token, target TokenType) int {
for i, token := range tokens {
if token.Type == target {
return i
}
}
return -1
}
func FindUnescaped(tokens []*Token, target TokenType) int {
for i, token := range tokens {
if token.Type == target && (i == 0 || (i > 0 && tokens[i-1].Type != Backslash)) {
return i
}
}
return 0, false
return -1
}
func GetFirstLine(tokens []*Token) []*Token {
for i, token := range tokens {
if token.Type == Newline {
return tokens[:i]
}
}
return tokens
}

@ -1,8 +1,6 @@
package parser
import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
@ -13,62 +11,36 @@ func NewUnorderedListParser() *UnorderedListParser {
return &UnorderedListParser{}
}
func (*UnorderedListParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
}
func (p *UnorderedListParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
matchedTokens := tokenizer.GetFirstLine(tokens)
indent := 0
for _, token := range tokens {
for _, token := range matchedTokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
corsor := indent
symbolToken := tokens[corsor]
if (symbolToken.Type != tokenizer.Hyphen && symbolToken.Type != tokenizer.Asterisk && symbolToken.Type != tokenizer.PlusSign) || tokens[corsor+1].Type != tokenizer.Space {
return 0, false
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[corsor+2:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
if len(matchedTokens) < indent+2 {
return nil, 0
}
return indent + len(contentTokens) + 2, true
}
func (p *UnorderedListParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
symbolToken := matchedTokens[indent]
if (symbolToken.Type != tokenizer.Hyphen && symbolToken.Type != tokenizer.Asterisk && symbolToken.Type != tokenizer.PlusSign) || matchedTokens[indent+1].Type != tokenizer.Space {
return nil, 0
}
indent := 0
for _, token := range tokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
contentTokens := matchedTokens[indent+2:]
if len(contentTokens) == 0 {
return nil, 0
}
symbolToken := tokens[indent]
contentTokens := tokens[indent+2 : size]
children, err := ParseInline(contentTokens)
if err != nil {
return nil, err
return nil, 0
}
return &ast.UnorderedList{
Symbol: symbolToken.Type,
Indent: indent,
Children: children,
}, nil
}, indent + len(contentTokens) + 2
}

@ -50,7 +50,7 @@ func TestUnorderedListParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
node, _ := NewUnorderedListParser().Parse(tokens)
node, _ := NewUnorderedListParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
}
}

Loading…
Cancel
Save