From 65890bc257754a9d9bf89963c665cce603fa8f90 Mon Sep 17 00:00:00 2001 From: boojack Date: Wed, 24 May 2023 00:31:37 +0800 Subject: [PATCH] feat: implement code block parser (#1727) --- plugin/gomark/parser/code.go | 38 +++++++++++++ plugin/gomark/parser/code_block.go | 52 +++++++++++++++++ plugin/gomark/parser/code_block_test.go | 62 +++++++++++++++++++++ plugin/gomark/parser/code_test.go | 36 ++++++++++++ plugin/gomark/parser/tokenizer/tokenizer.go | 3 + 5 files changed, 191 insertions(+) create mode 100644 plugin/gomark/parser/code.go create mode 100644 plugin/gomark/parser/code_block.go create mode 100644 plugin/gomark/parser/code_block_test.go create mode 100644 plugin/gomark/parser/code_test.go diff --git a/plugin/gomark/parser/code.go b/plugin/gomark/parser/code.go new file mode 100644 index 000000000..6eae650c4 --- /dev/null +++ b/plugin/gomark/parser/code.go @@ -0,0 +1,38 @@ +package parser + +import "github.com/usememos/memos/plugin/gomark/parser/tokenizer" + +type CodeParser struct { + Content string +} + +func NewCodeParser() *CodeParser { + return &CodeParser{} +} + +func (*CodeParser) Match(tokens []*tokenizer.Token) *CodeParser { + if len(tokens) < 3 { + return nil + } + if tokens[0].Type != tokenizer.Backtick { + return nil + } + + content, matched := "", false + for _, token := range tokens[1:] { + if token.Type == tokenizer.Newline { + return nil + } + if token.Type == tokenizer.Backtick { + matched = true + break + } + content += token.Value + } + if !matched || len(content) == 0 { + return nil + } + return &CodeParser{ + Content: content, + } +} diff --git a/plugin/gomark/parser/code_block.go b/plugin/gomark/parser/code_block.go new file mode 100644 index 000000000..4bf4fcaca --- /dev/null +++ b/plugin/gomark/parser/code_block.go @@ -0,0 +1,52 @@ +package parser + +import "github.com/usememos/memos/plugin/gomark/parser/tokenizer" + +type CodeBlockParser struct { + Language string + Content string +} + +func NewCodeBlockParser() *CodeBlockParser { + return &CodeBlockParser{} +} + +func (*CodeBlockParser) Match(tokens []*tokenizer.Token) *CodeBlockParser { + if len(tokens) < 9 { + return nil + } + + if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick { + return nil + } + if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline { + return nil + } + cursor, language := 4, "" + if tokens[3].Type != tokenizer.Newline { + language = tokens[3].Value + cursor = 5 + } + + content, matched := "", false + for ; cursor < len(tokens)-3; cursor++ { + if tokens[cursor].Type == tokenizer.Newline && tokens[cursor+1].Type == tokenizer.Backtick && tokens[cursor+2].Type == tokenizer.Backtick && tokens[cursor+3].Type == tokenizer.Backtick { + if cursor+3 == len(tokens)-1 { + matched = true + break + } else if tokens[cursor+4].Type == tokenizer.Newline { + matched = true + break + } + } + content += tokens[cursor].Value + } + if !matched { + return nil + } + + return &CodeBlockParser{ + Language: language, + Content: content, + } +} diff --git a/plugin/gomark/parser/code_block_test.go b/plugin/gomark/parser/code_block_test.go new file mode 100644 index 000000000..246c9704a --- /dev/null +++ b/plugin/gomark/parser/code_block_test.go @@ -0,0 +1,62 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) + +func TestCodeBlockParser(t *testing.T) { + tests := []struct { + text string + codeBlock *CodeBlockParser + }{ + { + text: "```Hello world!```", + codeBlock: nil, + }, + { + text: "```\nHello\n```", + codeBlock: &CodeBlockParser{ + Language: "", + Content: "Hello", + }, + }, + { + text: "```\nHello world!\n```", + codeBlock: &CodeBlockParser{ + Language: "", + Content: "Hello world!", + }, + }, + { + text: "```java\nHello \n world!\n```", + codeBlock: &CodeBlockParser{ + Language: "java", + Content: "Hello \n world!", + }, + }, + { + text: "```java\nHello \n world!\n```111", + codeBlock: nil, + }, + { + text: "```java\nHello \n world!\n``` 111", + codeBlock: nil, + }, + { + text: "```java\nHello \n world!\n```\n123123", + codeBlock: &CodeBlockParser{ + Language: "java", + Content: "Hello \n world!", + }, + }, + } + + for _, test := range tests { + tokens := tokenizer.Tokenize(test.text) + codeBlock := NewCodeBlockParser() + require.Equal(t, test.codeBlock, codeBlock.Match(tokens)) + } +} diff --git a/plugin/gomark/parser/code_test.go b/plugin/gomark/parser/code_test.go new file mode 100644 index 000000000..2a9a0cad2 --- /dev/null +++ b/plugin/gomark/parser/code_test.go @@ -0,0 +1,36 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) + +func TestCodeParser(t *testing.T) { + tests := []struct { + text string + code *CodeParser + }{ + { + text: "`Hello world!", + code: nil, + }, + { + text: "`Hello world!`", + code: &CodeParser{ + Content: "Hello world!", + }, + }, + { + text: "`Hello \nworld!`", + code: nil, + }, + } + + for _, test := range tests { + tokens := tokenizer.Tokenize(test.text) + code := NewCodeParser() + require.Equal(t, test.code, code.Match(tokens)) + } +} diff --git a/plugin/gomark/parser/tokenizer/tokenizer.go b/plugin/gomark/parser/tokenizer/tokenizer.go index b277cc984..adf53a7d1 100644 --- a/plugin/gomark/parser/tokenizer/tokenizer.go +++ b/plugin/gomark/parser/tokenizer/tokenizer.go @@ -6,6 +6,7 @@ const ( Underline TokenType = "_" Star TokenType = "*" Hash TokenType = "#" + Backtick TokenType = "`" Newline TokenType = "\n" Space TokenType = " " ) @@ -38,6 +39,8 @@ func Tokenize(text string) []*Token { tokens = append(tokens, NewToken(Hash, "#")) case '\n': tokens = append(tokens, NewToken(Newline, "\n")) + case '`': + tokens = append(tokens, NewToken(Backtick, "`")) case ' ': tokens = append(tokens, NewToken(Space, " ")) default: