diff --git a/src/CodeFormatter.cs b/src/CodeFormatter.cs index 1e89ee22a9..f39ab21441 100644 --- a/src/CodeFormatter.cs +++ b/src/CodeFormatter.cs @@ -24,6 +24,7 @@ internal static class CodeFormatter new WhitespaceFormatter(), new FinalNewlineFormatter(), new EndOfLineFormatter(), + new CharsetFormatter(), }.ToImmutableArray(); public static async Task FormatWorkspaceAsync( diff --git a/src/Formatters/CharsetFormatter.cs b/src/Formatters/CharsetFormatter.cs new file mode 100644 index 0000000000..f58959c0fa --- /dev/null +++ b/src/Formatters/CharsetFormatter.cs @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.CodeAnalysis.Options; +using Microsoft.CodeAnalysis.Text; +using Microsoft.Extensions.Logging; +using Microsoft.VisualStudio.CodingConventions; + +namespace Microsoft.CodeAnalysis.Tools.Formatters +{ + internal sealed class CharsetFormatter : DocumentFormatter + { + protected override string FormatWarningDescription => Resources.Fix_file_encoding; + + private static Encoding Utf8 => new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + private static Encoding Latin1 => Encoding.GetEncoding("iso-8859-1"); + + protected override Task FormatFileAsync( + Document document, + SourceText sourceText, + OptionSet options, + ICodingConventionsSnapshot codingConventions, + FormatOptions formatOptions, + ILogger logger, + CancellationToken cancellationToken) + { + return Task.Run(() => + { + if (!TryGetCharset(codingConventions, out var encoding) + || sourceText.Encoding.Equals(encoding)) + { + return sourceText; + } + + return SourceText.From(sourceText.ToString(), encoding); + }); + } + + private static bool TryGetCharset(ICodingConventionsSnapshot codingConventions, out Encoding encoding) + { + if (codingConventions.TryGetConventionValue("charset", out string charsetOption)) + { + encoding = GetCharset(charsetOption); + return true; + } + + encoding = null; + return false; + } + + public static Encoding GetCharset(string charsetOption) + { + switch (charsetOption) + { + case "latin1": + return Latin1; + default: + case "utf-8": + return Utf8; + case "utf-8-bom": + return Encoding.UTF8; // UTF-8 with BOM Marker + case "utf-16be": + return Encoding.BigEndianUnicode; // Big Endian with BOM Marker + case "utf-16le": + return Encoding.Unicode; // Little Endian with BOM Marker + } + } + } +} diff --git a/src/Formatters/DocumentFormatter.cs b/src/Formatters/DocumentFormatter.cs index 98cc3c81c8..dad9b889ae 100644 --- a/src/Formatters/DocumentFormatter.cs +++ b/src/Formatters/DocumentFormatter.cs @@ -81,7 +81,7 @@ protected abstract Task FormatFileAsync( var originalSourceText = await document.GetTextAsync(cancellationToken).ConfigureAwait(false); var formattedSourceText = await FormatFileAsync(document, originalSourceText, options, codingConventions, formatOptions, logger, cancellationToken).ConfigureAwait(false); - return !formattedSourceText.ContentEquals(originalSourceText) + return !formattedSourceText.ContentEquals(originalSourceText) || !formattedSourceText.Encoding.Equals(originalSourceText.Encoding) ? (originalSourceText, formattedSourceText) : (originalSourceText, null); } @@ -117,7 +117,7 @@ private async Task ApplyFileChangesAsync( LogFormattingChanges(formatOptions.WorkspaceFilePath, document.FilePath, originalText, formattedText, formatOptions.ChangesAreErrors, logger); } - formattedSolution = formattedSolution.WithDocumentText(document.Id, formattedText); + formattedSolution = formattedSolution.WithDocumentText(document.Id, formattedText, PreservationMode.PreserveIdentity); } return formattedSolution; diff --git a/src/Resources.resx b/src/Resources.resx index 33ae19f4cf..2a86be8ca3 100644 --- a/src/Resources.resx +++ b/src/Resources.resx @@ -201,4 +201,7 @@ Fix whitespace formatting. + + Fix file encoding. + \ No newline at end of file diff --git a/src/xlf/Resources.cs.xlf b/src/xlf/Resources.cs.xlf index c7b24acc92..48fc81ae18 100644 --- a/src/xlf/Resources.cs.xlf +++ b/src/xlf/Resources.cs.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.de.xlf b/src/xlf/Resources.de.xlf index c3bf63a593..75716acc73 100644 --- a/src/xlf/Resources.de.xlf +++ b/src/xlf/Resources.de.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.es.xlf b/src/xlf/Resources.es.xlf index 165d8bd43c..c292b95efb 100644 --- a/src/xlf/Resources.es.xlf +++ b/src/xlf/Resources.es.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.fr.xlf b/src/xlf/Resources.fr.xlf index a3934f8bd7..528b9962ee 100644 --- a/src/xlf/Resources.fr.xlf +++ b/src/xlf/Resources.fr.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.it.xlf b/src/xlf/Resources.it.xlf index 0c7d782f97..d5ec868f7c 100644 --- a/src/xlf/Resources.it.xlf +++ b/src/xlf/Resources.it.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.ja.xlf b/src/xlf/Resources.ja.xlf index 9d6ab61da4..509ee5ad9a 100644 --- a/src/xlf/Resources.ja.xlf +++ b/src/xlf/Resources.ja.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.ko.xlf b/src/xlf/Resources.ko.xlf index 9b900c5da9..4ba7dc9fad 100644 --- a/src/xlf/Resources.ko.xlf +++ b/src/xlf/Resources.ko.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.pl.xlf b/src/xlf/Resources.pl.xlf index f206586bc2..653d8745ac 100644 --- a/src/xlf/Resources.pl.xlf +++ b/src/xlf/Resources.pl.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.pt-BR.xlf b/src/xlf/Resources.pt-BR.xlf index 2134767bc3..47ee54d398 100644 --- a/src/xlf/Resources.pt-BR.xlf +++ b/src/xlf/Resources.pt-BR.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.ru.xlf b/src/xlf/Resources.ru.xlf index 0f178f1850..dcf0dfa05a 100644 --- a/src/xlf/Resources.ru.xlf +++ b/src/xlf/Resources.ru.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.tr.xlf b/src/xlf/Resources.tr.xlf index e25bd5fa63..fb42711278 100644 --- a/src/xlf/Resources.tr.xlf +++ b/src/xlf/Resources.tr.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.zh-Hans.xlf b/src/xlf/Resources.zh-Hans.xlf index 14d975c76a..4f19172e6e 100644 --- a/src/xlf/Resources.zh-Hans.xlf +++ b/src/xlf/Resources.zh-Hans.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/src/xlf/Resources.zh-Hant.xlf b/src/xlf/Resources.zh-Hant.xlf index a33d2fd8b4..5f60ebdcc6 100644 --- a/src/xlf/Resources.zh-Hant.xlf +++ b/src/xlf/Resources.zh-Hant.xlf @@ -47,6 +47,11 @@ Fix end of line marker. + + Fix file encoding. + Fix file encoding. + + Fix whitespace formatting. Fix whitespace formatting. diff --git a/tests/Formatters/AbstractFormatterTests.cs b/tests/Formatters/AbstractFormatterTests.cs index 7274d4f14d..7ab9635ef0 100644 --- a/tests/Formatters/AbstractFormatterTests.cs +++ b/tests/Formatters/AbstractFormatterTests.cs @@ -5,6 +5,7 @@ using System.Collections.Immutable; using System.IO; using System.Linq; +using System.Text; using System.Threading; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Host.Mef; @@ -66,50 +67,67 @@ protected AbstractFormatterTest() /// public abstract string Language { get; } - private string TestCode - { - set - { - if (value != null) - { - TestState.Sources.Add(value); - } - } - } - private static ILogger Logger => new TestLogger(); private static EditorConfigOptionsApplier OptionsApplier => new EditorConfigOptionsApplier(); public SolutionState TestState { get; } - private protected async Task TestAsync(string testCode, string expectedCode, IReadOnlyDictionary editorConfig) + private protected Task TestAsync(string testCode, string expectedCode, IReadOnlyDictionary editorConfig) + { + return TestAsync(testCode, expectedCode, editorConfig, Encoding.UTF8); + } + + private protected async Task TestAsync(string testCode, string expectedCode, IReadOnlyDictionary editorConfig, Encoding encoding) { - TestCode = testCode; + var text = SourceText.From(testCode, encoding); + TestState.Sources.Add(text); var solution = GetSolution(TestState.Sources.ToArray(), TestState.AdditionalFiles.ToArray(), TestState.AdditionalReferences.ToArray()); var project = solution.Projects.Single(); var document = project.Documents.Single(); - var options = (OptionSet)await document.GetOptionsAsync(); var formatOptions = new FormatOptions( workspaceFilePath: project.FilePath, isSolution: false, logLevel: LogLevel.Trace, saveFormattedFiles: false, changesAreErrors: false, - filesToFormat: ImmutableHashSet.Create(document.FilePath)); - - ICodingConventionsSnapshot codingConventions = new TestCodingConventionsSnapshot(editorConfig); - options = OptionsApplier.ApplyConventions(options, codingConventions, Language); + filesToFormat: ImmutableHashSet.Create(document.FilePath)); - var filesToFormat = new[] { (document, options, codingConventions) }.ToImmutableArray(); + var filesToFormat = await GetOnlyFileToFormatAsync(solution, editorConfig); var formattedSolution = await Formatter.FormatAsync(solution, filesToFormat, formatOptions, Logger, default); - var formattedDocument = formattedSolution.Projects.Single().Documents.Single(); + var formattedDocument = GetOnlyDocument(formattedSolution); var formattedText = await formattedDocument.GetTextAsync(); Assert.Equal(expectedCode, formattedText.ToString()); + + return formattedText; } + /// + /// Gets the only along with related options and conventions. + /// + /// A Solution containing a single Project containing a single Document. + /// The editorconfig to apply to the documents options set. + /// The document contained within along with option set and coding conventions. + protected async Task> GetOnlyFileToFormatAsync(Solution solution, IReadOnlyDictionary editorConfig) + { + var document = GetOnlyDocument(solution); + var options = (OptionSet)await document.GetOptionsAsync(); + + ICodingConventionsSnapshot codingConventions = new TestCodingConventionsSnapshot(editorConfig); + options = OptionsApplier.ApplyConventions(options, codingConventions, Language); + + return new[] { (document, options, codingConventions) }.ToImmutableArray(); + } + + /// + /// Gets the only contained within the only within the . + /// + /// A Solution containing a single Project containing a single Document. + /// The document contained within. + public Document GetOnlyDocument(Solution solution) => solution.Projects.Single().Documents.Single(); + /// /// Gets the collection of inputs to provide to the XML documentation resolver. /// @@ -125,13 +143,6 @@ private protected async Task TestAsync(string testCode, string expectedCode, IRe /// public List> OptionsTransforms { get; } = new List>(); - public Document GetTestDocument(string testCode) - { - TestCode = testCode; - var solution = GetSolution(TestState.Sources.ToArray(), TestState.AdditionalFiles.ToArray(), TestState.AdditionalReferences.ToArray()); - return solution.Projects.Single().Documents.Single(); - } - /// /// Given an array of strings as sources and a language, turn them into a and return the /// solution. diff --git a/tests/Formatters/CharsetFormatterTests.cs b/tests/Formatters/CharsetFormatterTests.cs new file mode 100644 index 0000000000..7d370670ff --- /dev/null +++ b/tests/Formatters/CharsetFormatterTests.cs @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using Microsoft.CodeAnalysis.Tools.Formatters; +using Xunit; + +namespace Microsoft.CodeAnalysis.Tools.Tests.Formatters +{ + public class CharsetFormatterTests : CSharpFormatterTests + { + private protected override ICodeFormatter Formatter => new CharsetFormatter(); + + [Theory] + [InlineData("latin1", "utf-8")] + [InlineData("latin1", "utf-8-bom")] + [InlineData("latin1", "utf-16be")] + [InlineData("latin1", "utf-16le")] + [InlineData("utf-8", "latin1")] + [InlineData("utf-8", "utf-8-bom")] + [InlineData("utf-8", "utf-16be")] + [InlineData("utf-8", "utf-16le")] + [InlineData("utf-8-bom", "latin1")] + [InlineData("utf-8-bom", "utf-8")] + [InlineData("utf-8-bom", "utf-16be")] + [InlineData("utf-8-bom", "utf-16le")] + [InlineData("utf-16be", "latin1")] + [InlineData("utf-16be", "utf-8")] + [InlineData("utf-16be", "utf-8-bom")] + [InlineData("utf-16be", "utf-16le")] + [InlineData("utf-16le", "latin1")] + [InlineData("utf-16le", "utf-8")] + [InlineData("utf-16le", "utf-8-bom")] + [InlineData("utf-16le", "utf-16be")] + public async Task TestCharsetWrong_CharsetFixed(string codeValue, string expectedValue) + { + var codeEncoding = CharsetFormatter.GetCharset(codeValue); + var expectedEncoding = CharsetFormatter.GetCharset(expectedValue); + + var testCode = "class C { }"; + + var editorConfig = new Dictionary() + { + ["charset"] = expectedValue, + }; + + var formattedText = await TestAsync(testCode, testCode, editorConfig, codeEncoding); + + Assert.Equal(expectedEncoding, formattedText.Encoding); + } + + [Fact] + public async Task TestCharsetNotSpecified_NoChange() + { + // This encoding is not supported by .editorconfig + var codeEncoding = Encoding.UTF32; + + var testCode = "class C { }"; + + var editorConfig = new Dictionary() + { + }; + + var formattedText = await TestAsync(testCode, testCode, editorConfig, codeEncoding); + + Assert.Equal(codeEncoding, formattedText.Encoding); + } + } +}