diff --git a/Turbulence.Core/ViewModels/Design/DesignMessagesViewModel.cs b/Turbulence.Core/ViewModels/Design/DesignMessagesViewModel.cs index f0d399c..1a9d399 100644 --- a/Turbulence.Core/ViewModels/Design/DesignMessagesViewModel.cs +++ b/Turbulence.Core/ViewModels/Design/DesignMessagesViewModel.cs @@ -52,6 +52,7 @@ public DesignMessagesViewModel() CurrentMessages.AddRange(new List { message1, CreateMessage("no", MessageType.DEFAULT, user2, now - new TimeSpan(0, 1, 0), message1), + CreateMessage("normal, ||spoiler|| *italic* ~~strike~~ **bold** **_mix_**\n```cs\ncodeblock```\n`inline`\nhttps://localhost.com\ne:weary: e<:salute:933778727547060234>\nu<@138397087229280257> c<#959372274299961344> r<@&985907053237268480>\n> quote1\n> quote2", MessageType.DEFAULT, user2, now - new TimeSpan(0, 2, 0), null) }); } diff --git a/Turbulence.Desktop/Converters/MessageContentConverter.cs b/Turbulence.Desktop/Converters/MessageContentConverter.cs index 633e1bd..f0b9fb5 100644 --- a/Turbulence.Desktop/Converters/MessageContentConverter.cs +++ b/Turbulence.Desktop/Converters/MessageContentConverter.cs @@ -4,6 +4,8 @@ using System.Globalization; using Turbulence.Discord; using Turbulence.Discord.Models.DiscordChannel; +using Turbulence.Discord.Utils; +using Turbulence.Discord.Utils.Parser; namespace Turbulence.Desktop.Converters; @@ -16,10 +18,58 @@ public class MessageContentConverter : IValueConverter if (value is not Message message) return null; - var res = new InlineCollection + var res = new InlineCollection(); + var content = _client.GetMessageContent(message); + var nodes = MessageParser.Parse(content); + + // Recursive function that turns nodes into avalonia inlines (run/span) + static Inline FromNode(Node node) + { + Inline ret; + switch (node.Type) + { + case NodeType.STRIKETHROUGH: + case NodeType.SPOILER: + case NodeType.BOLD: + case NodeType.UNDERLINE: + case NodeType.ITALIC: + case NodeType.CODE_INLINE: + case NodeType.CODE_BLOCK: + case NodeType.QUOTE_BLOCK: + ret = new Span(); + ret.Classes.Add(node.Type.ToString()); + if (node.Children != null) + foreach (var child in node.Children) + ((Span)ret).Inlines.Add(FromNode(child)); + break; + case NodeType.URL_WITH_PREVIEW: + case NodeType.URL_WITHOUT_PREVIEW: + ret = new Run(node.Url); + ret.Classes.Add("Url"); + break; + case NodeType.USER: + case NodeType.CHANNEL: + case NodeType.ROLE: + //TODO: mentions + ret = new Run($"@{node.Id}"); + break; + case NodeType.EMOJI_UNICODE_ENCODED: + case NodeType.EMOJI_CUSTOM: + //TODO: emojis + ret = new Run($":{node.Emoji}:"); + break; + case NodeType.TEXT: + default: + ret = new Run(node.Text); + break; + } + return ret; + } + // Add nodes + foreach (var node in nodes) { - _client.GetMessageContent(message), - }; + res.Add(FromNode(node)); + } if (message.EditedTimestamp != null) { var editRun = new Run(" [Edited]"); diff --git a/Turbulence.Desktop/Views/Main/MessageView.axaml b/Turbulence.Desktop/Views/Main/MessageView.axaml index e66c5a3..6501631 100644 --- a/Turbulence.Desktop/Views/Main/MessageView.axaml +++ b/Turbulence.Desktop/Views/Main/MessageView.axaml @@ -51,10 +51,44 @@ + + + + + + + + + + + + diff --git a/Turbulence.Discord.Test/MessageParser/LexerTests.cs b/Turbulence.Discord.Test/MessageParser/LexerTests.cs new file mode 100644 index 0000000..3d4e060 --- /dev/null +++ b/Turbulence.Discord.Test/MessageParser/LexerTests.cs @@ -0,0 +1,158 @@ +using Turbulence.Discord.Utils.Parser; + +namespace Turbulence.Discord.Test.MessageParser; + +public class LexerTests +{ + [SetUp] + public void Setup() + { + } + + private static void Test(string text, IEnumerable should) + { + var tokens = Lexer.Lex(text); + Assert.That(tokens, Is.Not.Null); + Assert.That(tokens, Is.Not.Empty); + foreach (var token in tokens) + { + Console.WriteLine(token); + } + Assert.That(tokens.Count(), Is.EqualTo(should.Count())); + for (var i = 0; i < tokens.Count(); i++) + { + var actual = tokens.ElementAt(i); + var expected = should.ElementAt(i); + Assert.Multiple(() => + { + Assert.That(actual.Type, Is.EqualTo(expected.Type)); + Assert.That(actual.Value, Is.EqualTo(expected.Value)); + }); + } + } + + [Test] + public void Text() + { + var text = "text, **bold**, *italics*, _italics_, ~~strikethrough~~, ||spoiler||"; + var tokens = new Token[] + { + new(TokenType.TEXT_INLINE, "text, "), + new(TokenType.STAR, "*"), + new(TokenType.STAR, "*"), + new(TokenType.TEXT_INLINE, "bold"), + new(TokenType.STAR, "*"), + new(TokenType.STAR, "*"), + new(TokenType.TEXT_INLINE, ", "), + new(TokenType.STAR, "*"), + new(TokenType.TEXT_INLINE, "italics"), + new(TokenType.STAR, "*"), + new(TokenType.TEXT_INLINE, ", "), + new(TokenType.UNDERSCORE, "_"), + new(TokenType.TEXT_INLINE, "italics"), + new(TokenType.UNDERSCORE, "_"), + new(TokenType.TEXT_INLINE, ", "), + new(TokenType.TILDE, "~"), + new(TokenType.TILDE, "~"), + new(TokenType.TEXT_INLINE, "strikethrough"), + new(TokenType.TILDE, "~"), + new(TokenType.TILDE, "~"), + new(TokenType.TEXT_INLINE, ", "), + new(TokenType.SPOILER_DELIMITER, "||"), + new(TokenType.TEXT_INLINE, "spoiler"), + new(TokenType.SPOILER_DELIMITER, "||"), + }; + Test(text, tokens); + } + + [Test] + public void Code() + { + var text = @"`code` +```cs +// *fake* +public void Real() { + Stuff(); +} +```".Replace("\r\n", "\n"); + var tokens = new Token[] + { + new(TokenType.CODE_INLINE_DELIMITER, "`"), + new(TokenType.TEXT_INLINE, "code"), + new(TokenType.CODE_INLINE_DELIMITER, "`"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.CODE_BLOCK_DELIMITER, "```"), + new(TokenType.TEXT_INLINE, "cs"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.TEXT_INLINE, "// "), + new(TokenType.STAR, "*"), + new(TokenType.TEXT_INLINE, "fake"), + new(TokenType.STAR, "*"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.TEXT_INLINE, "public void Real() {"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.TEXT_INLINE, " Stuff();"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.TEXT_INLINE, "}"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.CODE_BLOCK_DELIMITER, "```"), + }; + Test(text, tokens); + } + + [Test] + public void Mentions() + { + var text = "<@138397087229280257>\n<#959372274299961344>\n<@&985907053237268480>"; + var tokens = new Token[] + { + new(TokenType.USER_MENTION, "<@138397087229280257>"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.CHANNEL_MENTION, "<#959372274299961344>"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.ROLE_MENTION, "<@&985907053237268480>"), + }; + Test(text, tokens); + } + + [Test] + public void Emojis() + { + var text = ":weary:\n<:salute:933778727547060234>"; + var tokens = new Token[] + { + new(TokenType.EMOJI_UNICODE_ENCODED, ":weary:"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.EMOJI_CUSTOM, "<:salute:933778727547060234>"), + }; + Test(text, tokens); + } + + [Test] + public void Links() + { + var text = "\nhttps://google.com"; + var tokens = new Token[] + { + new(TokenType.URL_WITHOUT_PREVIEW, ""), + new(TokenType.NEWLINE, "\n"), + new(TokenType.URL_WITH_PREVIEW, "https://google.com"), + }; + Test(text, tokens); + } + + [Test] + public void Quote() + { + var text = "> line1\n> line2"; + var tokens = new Token[] + { + new(TokenType.QUOTE_LINE_PREFIX, "> "), + new(TokenType.TEXT_INLINE, "line1"), + new(TokenType.NEWLINE, "\n"), + new(TokenType.QUOTE_LINE_PREFIX, "> "), + new(TokenType.TEXT_INLINE, "line2"), + }; + Test(text, tokens); + } +} \ No newline at end of file diff --git a/Turbulence.Discord.Test/MessageParser/ParserTests.cs b/Turbulence.Discord.Test/MessageParser/ParserTests.cs new file mode 100644 index 0000000..21fc1e2 --- /dev/null +++ b/Turbulence.Discord.Test/MessageParser/ParserTests.cs @@ -0,0 +1,179 @@ +using Turbulence.Discord.Utils.Parser; + +namespace Turbulence.Discord.Test.MessageParser; + +public class ParserTests +{ + [SetUp] + public void Setup() + { + } + + private static void Test(string text, IEnumerable should) + { + var parts = Utils.MessageParser.Parse(text); + Assert.That(parts, Is.Not.Empty); + + static void print(Node node, int depth = 0) + { + Console.WriteLine($"{new string('-', depth)}{node}"); + if (node.Children != null) + { + foreach (var c in node.Children) + { + print(c, depth + 1); + } + } + } + foreach (var part in parts) + { + print(part); + } + + // asserts that two nodes (and its children) are the same + static void assert(Node should, Node actual) + { + // Properties + Assert.Multiple(() => + { + Assert.That(actual.Type, Is.EqualTo(should.Type)); + Assert.That(actual.Text, Is.EqualTo(should.Text)); + Assert.That(actual.Id, Is.EqualTo(should.Id)); + Assert.That(actual.Emoji, Is.EqualTo(should.Emoji)); + Assert.That(actual.CodeLanguage, Is.EqualTo(should.CodeLanguage)); + Assert.That(actual.Url, Is.EqualTo(should.Url)); + }); + // Children + if (actual.Children == null) + Assert.That(should.Children, Is.Null); + else + { + Assert.That(should.Children, Is.Not.Null); + Assert.That(actual.Children.Count(), Is.EqualTo(should.Children!.Count())); + for (var i = 0; i < actual.Children.Count(); i++) + { + var a = actual.Children.ElementAt(i); + var s = should.Children!.ElementAt(i); + assert(s, a); + } + } + } + + Assert.That(parts, Has.Count.EqualTo(should.Count())); + for (var i = 0; i < parts.Count; i++) + { + var a = parts[i]; + var s = should.ElementAt(i); + assert(s, a); + } + } + [Test] + public void Text() + { + var text = "text, **bold**, *italics*, _italics_, ~~strikethrough~~, ||spoiler||"; + var should = new Node[] + { + new(NodeType.TEXT, Text: "text, "), + new(NodeType.BOLD, Children: new List(){ + new(NodeType.TEXT, Text: "bold") + }), + new(NodeType.TEXT, Text: ", "), + new(NodeType.ITALIC, Children: new List(){ + new(NodeType.TEXT, Text: "italics") + }), + new(NodeType.TEXT, Text: ", "), + new(NodeType.ITALIC, Children: new List(){ + new(NodeType.TEXT, Text: "italics") + }), + new(NodeType.TEXT, Text: ", "), + new(NodeType.STRIKETHROUGH, Children: new List(){ + new(NodeType.TEXT, Text: "strikethrough") + }), + new(NodeType.TEXT, Text: ", "), + new(NodeType.SPOILER, Children: new List(){ + new(NodeType.TEXT, Text: "spoiler") + }), + }; + Test(text, should); + } + + [Test] + public void Code() + { + var text = @"`code` +```cs +// *fake* +public void Real() { + Stuff(); +} +```".Replace("\r\n", "\n"); + var should = new Node[] + { + new(NodeType.CODE_INLINE, Children: new List() + { + new(NodeType.TEXT, Text: "code") + }), + new(NodeType.TEXT, Text: "\n"), + new(NodeType.CODE_BLOCK, CodeLanguage: "cs", Children: new List() + { + new(NodeType.TEXT, Text: "// *fake*\npublic void Real() {\n Stuff();\n}\n") + }) + }; + Test(text, should); + } + + [Test] + public void Mentions() + { + var text = "<@138397087229280257>\n<#959372274299961344>\n<@&985907053237268480>"; + var should = new Node[] + { + new(NodeType.USER, Id: new(138397087229280257)), + new(NodeType.TEXT, Text: "\n"), + new(NodeType.CHANNEL, Id: new(959372274299961344)), + new(NodeType.TEXT, Text: "\n"), + new(NodeType.ROLE, Id: new(985907053237268480)), + }; + Test(text, should); + } + + [Test] + public void Emojis() + { + var text = ":weary:\n<:salute:933778727547060234>"; + var should = new Node[] + { + new(NodeType.EMOJI_UNICODE_ENCODED, Emoji: "weary"), + new(NodeType.TEXT, Text: "\n"), + new(NodeType.EMOJI_CUSTOM, Id: new(933778727547060234), Emoji: "salute"), + }; + Test(text, should); + } + + [Test] + public void Links() + { + var text = "\nhttps://google.com"; + var should = new Node[] + { + new(NodeType.URL_WITHOUT_PREVIEW, Url: "https://google.com"), + new(NodeType.TEXT, Text: "\n"), + new(NodeType.URL_WITH_PREVIEW, Url: "https://google.com"), + }; + Test(text, should); + } + + [Test] + public void Quote() + { + var text = "> line1\n> line2"; + var should = new Node[] + { + new(NodeType.QUOTE_BLOCK, Children: new List() + { + new(NodeType.TEXT, Text: "line1\nline2") + }), + }; + Test(text, should); + } +} \ No newline at end of file diff --git a/Turbulence.Discord.Test/Turbulence.Discord.Test.csproj b/Turbulence.Discord.Test/Turbulence.Discord.Test.csproj new file mode 100644 index 0000000..a980f40 --- /dev/null +++ b/Turbulence.Discord.Test/Turbulence.Discord.Test.csproj @@ -0,0 +1,24 @@ + + + + net7.0 + enable + enable + + false + true + + + + + + + + + + + + + + + diff --git a/Turbulence.Discord.Test/Usings.cs b/Turbulence.Discord.Test/Usings.cs new file mode 100644 index 0000000..cefced4 --- /dev/null +++ b/Turbulence.Discord.Test/Usings.cs @@ -0,0 +1 @@ +global using NUnit.Framework; \ No newline at end of file diff --git a/Turbulence.Discord/Utils/MessageParser.cs b/Turbulence.Discord/Utils/MessageParser.cs new file mode 100644 index 0000000..dc3aa91 --- /dev/null +++ b/Turbulence.Discord/Utils/MessageParser.cs @@ -0,0 +1,16 @@ + + +using Turbulence.Discord.Utils.Parser; + +namespace Turbulence.Discord.Utils; + +public static class MessageParser +{ + public static List Parse(string text) + { + var tokens = Lexer.Lex(text); + if (tokens == null) + return new List(); + return Parser.Parser.ParseTokens(tokens.ToArray()); + } +} diff --git a/Turbulence.Discord/Utils/Parser/Lexer.cs b/Turbulence.Discord/Utils/Parser/Lexer.cs new file mode 100644 index 0000000..473910c --- /dev/null +++ b/Turbulence.Discord/Utils/Parser/Lexer.cs @@ -0,0 +1,150 @@ +using System.Text.RegularExpressions; + +namespace Turbulence.Discord.Utils.Parser; + +public enum TokenType +{ + TEXT_INLINE = 1, + NEWLINE, + STAR, + UNDERSCORE, + TILDE, + SPOILER_DELIMITER, + USER_MENTION, + ROLE_MENTION, + CHANNEL_MENTION, + EMOJI_CUSTOM, + EMOJI_UNICODE_ENCODED, + URL_WITH_PREVIEW, + URL_WITHOUT_PREVIEW, + QUOTE_LINE_PREFIX, + CODE_INLINE_DELIMITER, + CODE_BLOCK_DELIMITER +} + +public record Token(TokenType Type, string Value, GroupCollection? Groups = null); + +public static partial class Lexer +{ + public static IEnumerable? Lex(string input) + { + var seenSimpleText = ""; + + while (true) + { + if (input.Length == 0) + { + if (seenSimpleText.Length > 0) + { + yield return new(TokenType.TEXT_INLINE, seenSimpleText); + } + // finished + yield break; + } + + LexingRule? matchingRule = null; + Match? match = null; + foreach (var rule in Rules) + { + match = rule.Pattern.Match(input); + if (match.Success) + { + matchingRule = rule; + break; + } + } + + if (matchingRule == null) + { + seenSimpleText += input[0]; + input = input[1..]; + continue; // don't yield a token in this run + } + + // cut off matched part + input = input[match!.Captures[0].Length..]; + + // yield inline text if we have some left + if (seenSimpleText.Length > 0) + { + yield return new(TokenType.TEXT_INLINE, seenSimpleText); + seenSimpleText = ""; + } + + GroupCollection? groups = null; + if (match.Groups.Count > 0) + { + groups = match.Groups; + } + + yield return new(matchingRule.Type, match.Groups[0].Value, groups); + } + } + + private record LexingRule(TokenType Type, Regex Pattern); + private static readonly LexingRule[] Rules = + { + new(TokenType.USER_MENTION, UserMentionRegex()), + new(TokenType.ROLE_MENTION, RoleMentionRegex()), + new(TokenType.CHANNEL_MENTION, ChannelMentionRegex()), + new(TokenType.EMOJI_CUSTOM, CustomEmojiRegex()), + new(TokenType.EMOJI_UNICODE_ENCODED, UnicodeEmojiRegex()), + new(TokenType.URL_WITHOUT_PREVIEW, URLRegex()), + new(TokenType.URL_WITH_PREVIEW, URLPreviewRegex()), + new(TokenType.QUOTE_LINE_PREFIX, QuoteLineRegex()), + new(TokenType.TILDE, TildeRegex()), + new(TokenType.STAR, StarRegex()), + new(TokenType.UNDERSCORE, UnderscoreRegex()), + new(TokenType.SPOILER_DELIMITER, SpoilerRegex()), + new(TokenType.CODE_BLOCK_DELIMITER, CodeBlockRegex()), + new(TokenType.CODE_INLINE_DELIMITER, CodeInlineRegex()), + new(TokenType.NEWLINE, NewlineRegex()), + }; + + // https://discord.com/developers/docs/reference#message-formatting + private const string URL_REGEX = "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"; + [GeneratedRegex("^<@!?([0-9]+)>", RegexOptions.Compiled)] + private static partial Regex UserMentionRegex(); + + [GeneratedRegex("^<@&([0-9]+)>", RegexOptions.Compiled)] + private static partial Regex RoleMentionRegex(); + + [GeneratedRegex("^<#([0-9]+)>", RegexOptions.Compiled)] + private static partial Regex ChannelMentionRegex(); + + [GeneratedRegex("^<:([a-zA-Z0-9_]{2,}):([0-9]+)>", RegexOptions.Compiled)] + private static partial Regex CustomEmojiRegex(); + + [GeneratedRegex("^:([a-zA-Z0-9_]+):", RegexOptions.Compiled)] + private static partial Regex UnicodeEmojiRegex(); + + [GeneratedRegex($"^<{URL_REGEX}>", RegexOptions.Compiled)] + private static partial Regex URLRegex(); + + [GeneratedRegex($"^{URL_REGEX}", RegexOptions.Compiled)] + private static partial Regex URLPreviewRegex(); + + [GeneratedRegex("^(>>)?> ", RegexOptions.Compiled)] + private static partial Regex QuoteLineRegex(); + + [GeneratedRegex("^~", RegexOptions.Compiled)] + private static partial Regex TildeRegex(); + + [GeneratedRegex("^\\*", RegexOptions.Compiled)] + private static partial Regex StarRegex(); + + [GeneratedRegex("^_", RegexOptions.Compiled)] + private static partial Regex UnderscoreRegex(); + + [GeneratedRegex("^\\|\\|", RegexOptions.Compiled)] + private static partial Regex SpoilerRegex(); + + [GeneratedRegex("^```", RegexOptions.Compiled)] + private static partial Regex CodeBlockRegex(); + + [GeneratedRegex("^`", RegexOptions.Compiled)] + private static partial Regex CodeInlineRegex(); + + [GeneratedRegex("^\n", RegexOptions.Compiled)] + private static partial Regex NewlineRegex(); +} \ No newline at end of file diff --git a/Turbulence.Discord/Utils/Parser/Parser.cs b/Turbulence.Discord/Utils/Parser/Parser.cs new file mode 100644 index 0000000..05b24f1 --- /dev/null +++ b/Turbulence.Discord/Utils/Parser/Parser.cs @@ -0,0 +1,427 @@ +using System.Text.RegularExpressions; +using Turbulence.Discord.Models; + +namespace Turbulence.Discord.Utils.Parser; + +public enum NodeType +{ + TEXT = 1, + ITALIC, + BOLD, + UNDERLINE, + STRIKETHROUGH, + SPOILER, + USER, + ROLE, + CHANNEL, + EMOJI_CUSTOM, + EMOJI_UNICODE_ENCODED, + URL_WITH_PREVIEW, + URL_WITHOUT_PREVIEW, + QUOTE_BLOCK, + CODE_BLOCK, + CODE_INLINE +} + +public record Node(NodeType Type, string? Text = null, Snowflake? Id = null, string? Emoji = null, string? CodeLanguage = null, string? Url = null, IEnumerable? Children = null) +{ + public string? Text { get; set; } = Text; + public IEnumerable? Children { get; set; } = Children; +} + +public static partial class Parser +{ + public static List ParseTokens(Token[] tokens) + { + return MergeTextNodes(ParseTokensGenerator(tokens)); + } + + + public static List MergeTextNodes(IEnumerable subtree) + { + var compressedTree = new List(); + Node? prevTextNode = null; + foreach (var node in subtree) + { + if (node.Type == NodeType.TEXT) + { + if (prevTextNode is null) + { + prevTextNode = node; + } + else + { + prevTextNode.Text += node.Text; + continue; // don't store this node + } + } + else + { + + prevTextNode = null; + } + + if (node.Children is not null) + { + node.Children = MergeTextNodes(node.Children); + } + + compressedTree.Add(node); + } + return compressedTree; + } + + private static readonly Dictionary _textModifiers = new() + { + { new TokenType[]{ TokenType.STAR, TokenType.STAR }, NodeType.BOLD }, + { new TokenType[]{ TokenType.UNDERSCORE, TokenType.UNDERSCORE }, NodeType.UNDERLINE }, + { new TokenType[]{ TokenType.TILDE, TokenType.TILDE }, NodeType.STRIKETHROUGH }, + { new TokenType[]{ TokenType.STAR }, NodeType.ITALIC }, + { new TokenType[]{ TokenType.UNDERSCORE }, NodeType.ITALIC }, + { new TokenType[]{ TokenType.SPOILER_DELIMITER }, NodeType.SPOILER }, + { new TokenType[]{ TokenType.CODE_INLINE_DELIMITER }, NodeType.CODE_INLINE }, + }; + [GeneratedRegex("^([a-zA-Z0-9-]*)(.*)$", RegexOptions.Compiled)] + private static partial Regex CodeLanguageRegex(); + public static IEnumerable ParseTokensGenerator(Token[] tokens, bool inQuote = false) + { + var i = 0; + while (i < tokens.Length) + { + Token current = tokens[i]; + + // === simple node types without children + // just continue; once any of them match + + // text + if (current.Type == TokenType.TEXT_INLINE) + { + yield return new Node(NodeType.TEXT, Text: current.Value); + i += 1; + continue; + } + + // user mentions + if (current.Type == TokenType.USER_MENTION) + { + yield return new Node(NodeType.USER, Id: new(ulong.Parse(current.Groups![1].Value))); + i += 1; + continue; + } + + // role mentions + if (current.Type == TokenType.ROLE_MENTION) + { + yield return new Node(NodeType.ROLE, Id: new(ulong.Parse(current.Groups![1].Value))); + i += 1; + continue; + } + + // channel mentions + if (current.Type == TokenType.CHANNEL_MENTION) + { + yield return new Node(NodeType.CHANNEL, Id: new(ulong.Parse(current.Groups![1].Value))); + i += 1; + continue; + } + + // custom emoji + if (current.Type == TokenType.EMOJI_CUSTOM) + { + yield return new Node( + NodeType.EMOJI_CUSTOM, + Id: new(ulong.Parse(current.Groups![2].Value)), + Emoji: current.Groups[1].Value + ); + i += 1; + continue; + } + + // unicode emoji (when it's encoded as :name: and not just written as unicode) + if (current.Type == TokenType.EMOJI_UNICODE_ENCODED) + { + yield return new Node( + NodeType.EMOJI_UNICODE_ENCODED, + Emoji: current.Groups![1].Value + ); + i += 1; + continue; + }; + + // URL with preview + if (current.Type == TokenType.URL_WITH_PREVIEW) + { + yield return new Node(NodeType.URL_WITH_PREVIEW, Url: current.Value); + i += 1; + continue; + } + + // URL without preview + if (current.Type == TokenType.URL_WITHOUT_PREVIEW) + { + yield return new Node(NodeType.URL_WITHOUT_PREVIEW, Url: current.Value[1..^1]); + i += 1; + continue; + } + + // === text modifiers + // these just modify the look of the text (bold, italic, inline code, ...), + // can appear everywhere (outside of code blocks) and can span all other + // elements (including code blocks) and can span across newlines. + // they must have at least one child token. + // note, however, that text modifiers (and all other nodes with children), + // can not overlap partially: + // strikethrough is completely inside italic, works: + // *a~~b~~c*d = abcd + // strikethrough only partially overlaps italic, strikethrough is ignored + // *a~~bc*d~~ = a~~bc~~d + // + // known issue: + // we don't account for the fact that spoilers can't wrap code blocks + { + Node? node = null; + int? consumedTokenCount = null; + foreach ((var delimiter, var nodeType) in _textModifiers) + { + var res = TryParseNodeWithChildren( + tokens[i..], delimiter, delimiter, nodeType, inQuote + ); + node = res.Item1; + consumedTokenCount = res.Item2; + + if (node != null) + break; + } + + + if (node != null) + { + i += consumedTokenCount!.Value; + yield return node; + continue; + } + } + + // === code blocks + // these are similar to text modifiers but have some additional twists + // - code blocks only contain inline text, all other markdown rules are disabled + // inside code blocks + // - the first line can be a language specifier for syntax highlighting. + // - the LS starts immediately after the code block delimiter and is + // immediately followed by a newline, otherwise it is treated as normal + // text content of the code block. + // - if the language specifier is omitted completely, i.e., the code block + // delimiter is immediately followed by a newline, then that newline is + // removed: + // ``` + // test + // ``` + // is, in HTML, test
+ // and not
test
+ + if (current.Type == TokenType.CODE_BLOCK_DELIMITER) + { + var (childrenToken, consumedTokenCount) = SearchForCloser( + tokens[(i + 1)..], new TokenType[] { TokenType.CODE_BLOCK_DELIMITER } + ); + if (childrenToken != null) + { + var childrenContent = ""; + // treat all children token as inline text + foreach (var child_token in childrenToken) + { + childrenContent += child_token.Value; + } + + // check for a language specifier + var lines = childrenContent.Split("\n"); + // there must be at least one other non-empty line + // (the content doesn't matter, there just has to be one) + var nonEmptyLineFound = false; + + string? lang = null; + for (var lineIndex = 1; lineIndex < lines.Length; lineIndex++) + { + if (lines[lineIndex].Length > 0) + { + nonEmptyLineFound = true; + break; + } + } + if (nonEmptyLineFound) + { + var match = CodeLanguageRegex().Match(lines[0]); + //if there is any behind the lang spec, then it is normal text + // otherwise, it is either a lang spec (gets removed from the + // displayed text) or it is empty (newline gets removed) + if (match.Groups[2].Length == 0) + { + lines = lines[1..]; // remove first line from code block + if (match.Groups[1].Length > 0) + { + lang = match.Groups[1].Value; + } + } + } + + + childrenContent = string.Join("\n", lines); + var child_node = new Node(NodeType.TEXT, Text: childrenContent); + yield return new Node(NodeType.CODE_BLOCK, CodeLanguage: lang, Children: new List() { child_node }); + i += 1 + consumedTokenCount!.Value; + continue; + } + } + + // === quote blocks + // these are a bit trickier. essentially, quote blocks are also + // "just another text modifier" but with a few more complicated rules + // - quote blocks always have "> " at the very beginning of every line + // - quote blocks can span multiple lines, meaning that if multiple consecutive + // lines start with "> ", then they belong to the same quote block + // - quote blocks can't be nested. any quote delimiters inside a quote block + // are just inline text. all other elements can appear inside a quote block + // - text modifiers + List? childrenTokenInQuoteBlock = new(); + // note that in_quote won't change during the while-loop, we're just reducing + // the level of indentation here by including it in the condition instead of + // making an additional if statement around the while loop + while ( + !inQuote && + i < tokens.Length && + tokens[i].Type == TokenType.QUOTE_LINE_PREFIX) + { + // scan until next newline + var found = false; + for (var j = i; j < tokens.Length; j++) + { + if (tokens[j].Type == TokenType.NEWLINE) + { + // add everything from the quote line prefix (non-inclusive) + // to the newline (inclusive) as children token + childrenTokenInQuoteBlock.AddRange(tokens[(i + 1)..(j + 1)]); + i = j + 1; // move to the token after the newline + found = true; + break; + } + } + if (!found) + { + // this is the last line, + // all remaining tokens are part of the quote block + childrenTokenInQuoteBlock.AddRange(tokens[(i + 1)..]); + i = tokens.Length; // move to the end + break; + } + } + + + if (childrenTokenInQuoteBlock.Count > 0) + { + // tell the inner parse function that it's now inside a quote block + var childrenNodes = ParseTokensGenerator(childrenTokenInQuoteBlock.ToArray(), inQuote = true); + yield return new Node(NodeType.QUOTE_BLOCK, Children: childrenNodes.ToList()); + continue; + } + + //if we get all the way here, than whatever token we're currently sitting on + // is not an inline text token but also failed to match any of our parsing rules. + // this happens when a special character, such as ">" or "*" is used as part of + // normal text. + // in this case, we just treat it as normal text. + // + // TODO: + // note that we don't combine multiple text nodes here. + // we *could* do it similar to what we do in the lexer but + // - remembering inline text into future loop iterations would require adding + // a check to every yield return-continue; combo in this function, which would be quite + // ugly + // - we can't change previous segments without dropping the generator + // functionality (even though that *is* the current workaround) + // - we can't look ahead without simulating this entire function on future tokens + //if you know how to do this *without adding ugly code*: help is appreciated. + // until then, this is a case of "we'll cross that bridge when we get there", + // i.e., we'll fix it if anyone comes along that actually needs it + yield return new Node(NodeType.TEXT, current.Value); + i += 1; + } + } + + + private static (Node?, int?) TryParseNodeWithChildren( + Token[] tokens, + TokenType[] opener, + TokenType[] closer, + NodeType nodeType, + bool in_quote + ) + { + //if there aren't enough tokens to match this node type, abort immediately + // +1 because there needs to be at least one child token + if (tokens.Length < opener.Length + 1 + closer.Length) + return (null, null); + + // check if the opener matches + for (var openerIndex = 0; openerIndex < opener.Length; openerIndex++) + { + if (tokens[openerIndex].Type != opener[openerIndex]) + { + return (null, null); + } + } + + // try finding the matching closer and consume as few tokens as possible + // (skip the first token as that has to be a child token) + // TODO: edge case ***bold and italic*** doesn't work + var (childrenToken, consumedTokenCount) = SearchForCloser( + tokens[(opener.Length + 1)..], closer + ); + + if (childrenToken == null) + { + // closer not found, abort trying to parse as the selected node type + return (null, null); + } + + // put first child token back in + childrenToken = new Token[] { tokens[opener.Length] }.Concat(childrenToken!).ToArray(); + //children_token = (tokens[opener.Length], *children_token); + consumedTokenCount += opener.Length + 1; + + return ( + new Node( + nodeType, + Children: ParseTokensGenerator(childrenToken, in_quote).ToList() + ), + consumedTokenCount); + } + + + private static (Token[]?, int?) SearchForCloser(Token[] tokens, TokenType[] closer) + { + // iterate over tokens + for (var tokenIndex = 0; tokenIndex < tokens.Length - closer.Length + 1; tokenIndex++) + { + var matches = true; + // try matching the closer to the current position by iterating over the closer + for (var closerIndex = 0; closerIndex < closer.Length; closerIndex++) + { + if (tokens[tokenIndex + closerIndex].Type != closer[closerIndex]) + { + matches = false; + break; + } + } + + // closer matched + if (matches) + { + return (tokens[..tokenIndex], tokenIndex + closer.Length); + } + } + // closer didn't match, try next token_index + + // closer was not found + return (null, null); + } +} diff --git a/Turbulence.sln b/Turbulence.sln index 51f72a0..63bcaa6 100644 --- a/Turbulence.sln +++ b/Turbulence.sln @@ -9,11 +9,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Turbulence.ModelGenerator", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Turbulence.Discord", "Turbulence.Discord\Turbulence.Discord.csproj", "{03A741E6-770F-4013-A26D-1CC2FB7BF9CC}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Turbulence.TGUI", "Turbulence.TGUI\Turbulence.TGUI.csproj", "{5C9C116F-CC5F-4FCD-BE4C-BC5DCA992B87}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Turbulence.TGUI", "Turbulence.TGUI\Turbulence.TGUI.csproj", "{5C9C116F-CC5F-4FCD-BE4C-BC5DCA992B87}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Turbulence.Core", "Turbulence.Core\Turbulence.Core.csproj", "{E4F0AD5F-74AF-40DE-9D40-A63E602C2636}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Turbulence.Core", "Turbulence.Core\Turbulence.Core.csproj", "{E4F0AD5F-74AF-40DE-9D40-A63E602C2636}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Turbulence.Desktop", "Turbulence.Desktop\Turbulence.Desktop.csproj", "{EE593AB4-B2BA-4872-A33B-E695BBF2B232}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Turbulence.Desktop", "Turbulence.Desktop\Turbulence.Desktop.csproj", "{EE593AB4-B2BA-4872-A33B-E695BBF2B232}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Platforms", "Platforms", "{83FBB5FD-A549-4A21-A5D4-E68F027A0DD1}" EndProject @@ -21,6 +21,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Utilities", "Utilities", "{ EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Frontend", "Frontend", "{C0D654EF-8CDC-4090-B40C-485E645355E1}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Turbulence.Discord.Test", "Turbulence.Discord.Test\Turbulence.Discord.Test.csproj", "{7285C15D-D7DF-4EED-BACA-B19B13454E83}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -51,18 +53,23 @@ Global {EE593AB4-B2BA-4872-A33B-E695BBF2B232}.Debug|Any CPU.Build.0 = Debug|Any CPU {EE593AB4-B2BA-4872-A33B-E695BBF2B232}.Release|Any CPU.ActiveCfg = Release|Any CPU {EE593AB4-B2BA-4872-A33B-E695BBF2B232}.Release|Any CPU.Build.0 = Release|Any CPU + {7285C15D-D7DF-4EED-BACA-B19B13454E83}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7285C15D-D7DF-4EED-BACA-B19B13454E83}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7285C15D-D7DF-4EED-BACA-B19B13454E83}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7285C15D-D7DF-4EED-BACA-B19B13454E83}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {80B3CED7-2126-4425-A0E4-833CCC5F9649} - EndGlobalSection GlobalSection(NestedProjects) = preSolution + {DA085BF7-18C0-4FEF-AB89-75E7E3F503FD} = {C0D654EF-8CDC-4090-B40C-485E645355E1} {615DD5FC-B1EE-4132-BA29-02EE988C59BB} = {47047DC2-0CF4-4ECA-B5A7-A536F3EDEFA4} + {03A741E6-770F-4013-A26D-1CC2FB7BF9CC} = {83FBB5FD-A549-4A21-A5D4-E68F027A0DD1} {5C9C116F-CC5F-4FCD-BE4C-BC5DCA992B87} = {C0D654EF-8CDC-4090-B40C-485E645355E1} {EE593AB4-B2BA-4872-A33B-E695BBF2B232} = {C0D654EF-8CDC-4090-B40C-485E645355E1} - {03A741E6-770F-4013-A26D-1CC2FB7BF9CC} = {83FBB5FD-A549-4A21-A5D4-E68F027A0DD1} - {DA085BF7-18C0-4FEF-AB89-75E7E3F503FD} = {C0D654EF-8CDC-4090-B40C-485E645355E1} + {7285C15D-D7DF-4EED-BACA-B19B13454E83} = {83FBB5FD-A549-4A21-A5D4-E68F027A0DD1} + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {80B3CED7-2126-4425-A0E4-833CCC5F9649} EndGlobalSection EndGlobal