Skip to content

Commit

Permalink
Adds Analyze Single vs Multi-thread benchmark (#215)
Browse files Browse the repository at this point in the history
* Add benchmarks

* Delete defaultRulesPkd.json

* Switch to using threadsafe ConcurrentDictionary over Unsafe HashSet

* Improve multithreaded reliability

Switch to concurrentstack instead of list internally

* Cleanup

* Update multiextractor from OSSGadget current

Dramatically improves speed and reliability.

* Fix two edge cases in MultiExtractor.

* No need to check null, this no longer returns nulls.

* Add note about path for benchmarks
  • Loading branch information
gfs authored May 21, 2020
1 parent 82b996c commit e976a5c
Show file tree
Hide file tree
Showing 25 changed files with 2,147 additions and 17,466 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,4 @@ __pycache__/
/UnitTest.Commands/output
/RulesPacker/log.txt
/AppInspector/ApplicationInspector.Commands.xml
AppInspector/Resources/defaultRulesPkd.json
8 changes: 4 additions & 4 deletions AppInspector.CLI/Writers/AnalyzeHtmlWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void PopulateTagGroups()
{
foreach (TagSearchPattern pattern in tagGroup.Patterns)
{
pattern.Detected = _appMetaData.UniqueTags.Any(v => v.Contains(pattern.SearchPattern));
pattern.Detected = _appMetaData.UniqueTags.Any(v => v.Key.Contains(pattern.SearchPattern));
//create dynamic "category" groups of tags with pattern relationship established from TagReportGroups.json
//that can be used to populate reports with various attributes for each tag detected
if (pattern.Detected)
Expand Down Expand Up @@ -382,7 +382,7 @@ private List<TagInfo> GetAllMatchingTagInfoList(TagGroup tagGroup, bool addNotFo
/// <returns></returns>
private List<TagInfo> GetTagInfoListByName()
{
List<string> orderedTags = _appMetaData.UniqueTags.ToList<string>();
List<string> orderedTags = _appMetaData.UniqueTags.Keys.ToList<string>();
orderedTags.Sort();
HashSet<string> dupCheck = new HashSet<string>();
List<TagInfo> result = new List<TagInfo>();
Expand Down Expand Up @@ -427,7 +427,7 @@ private List<TagInfo> GetTagInfoListByConfidence()
RulesEngine.Confidence[] confidences = { Confidence.High, Confidence.Medium, Confidence.Low };


foreach (string tag in _appMetaData.UniqueTags)
foreach (string tag in _appMetaData.UniqueTags.Keys)
{
var searchPattern = new Regex(tag, RegexOptions.IgnoreCase);
foreach (Confidence test in confidences)
Expand Down Expand Up @@ -470,7 +470,7 @@ private List<TagInfo> GetTagInfoListBySeverity()
RulesEngine.Severity[] severities = { Severity.Critical, Severity.Important, Severity.Moderate, Severity.BestPractice, Severity.ManualReview };


foreach (string tag in _appMetaData.UniqueTags)
foreach (string tag in _appMetaData.UniqueTags.Keys)
{
// TODO: How frequently are these generated? Cache?
var searchPattern = new Regex(tag, RegexOptions.IgnoreCase);
Expand Down
2 changes: 1 addition & 1 deletion AppInspector.CLI/Writers/AnalyzeJsonWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public override void WriteResults(Result result, CLICommandOptions commandOption

if (cLIAnalyzeCmdOptions.SimpleTagsOnly)
{
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags);
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags.Keys);
keys.Sort();
TagsFile tags = new TagsFile();
tags.Tags = keys.ToArray();
Expand Down
18 changes: 5 additions & 13 deletions AppInspector.CLI/Writers/AnalyzeTextWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public override void WriteResults(Result result, CLICommandOptions commandOption

if (cLIAnalyzeCmdOptions.SimpleTagsOnly)
{
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags);
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags.Keys);
keys.Sort();

foreach (string tag in keys)
Expand Down Expand Up @@ -66,17 +66,9 @@ public AnalyzeTextWriter(string formatString)

#region helpers

private string StringList(HashSet<string> data)
private string StringList(ConcurrentDictionary<string, byte> data)
{
StringBuilder build = new StringBuilder();

foreach (string s in data)
{
build.Append(s);
build.Append(" ");
}

return build.ToString();
return string.Join(' ', data.Keys);
}

private string StringList(Dictionary<string, int> data)
Expand Down Expand Up @@ -166,7 +158,7 @@ public void WriteAppMeta(MetaData metaData)
WriteOnce.General(string.Format("Unique matches: {0}", metaData.UniqueMatchesCount));

WriteOnce.General(MakeHeading("UniqueTags"));
List<string> orderedTags = metaData.UniqueTags.ToList<string>();
List<string> orderedTags = metaData.UniqueTags.Keys.ToList<string>();
orderedTags.Sort();

foreach (string tag in orderedTags)
Expand Down Expand Up @@ -205,7 +197,7 @@ private void WriteDependencies(MetaData metaData)
{
WriteOnce.General(MakeHeading("Dependencies"));

foreach (string s in metaData.UniqueDependencies)
foreach (string s in metaData.UniqueDependencies.Keys)
{
WriteOnce.General(s);
}
Expand Down
18 changes: 12 additions & 6 deletions AppInspector.sln
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AppInspector.RulesEngine", "RulesEngine\AppInspector.RulesEngine.csproj", "{C19A98D2-629D-4F4D-87E4-3154416970BA}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MultiExtractor", "MultiExtractor\MultiExtractor.csproj", "{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AppInspector.CLI", "AppInspector.CLI\AppInspector.CLI.csproj", "{824ED27E-A4CF-46A6-A01F-98B0821EB61C}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "RulesPacker", "RulesPacker", "{C464D0CE-5254-4EA5-87C9-C0C96E40C3CB}"
Expand All @@ -39,6 +37,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "RulesPacker", "RulesPacker"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTest.Commands", "UnitTest.Commands\UnitTest.Commands.csproj", "{181BD826-A428-41D9-8BEC-0D8EB2288DF5}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Benchmarks", "Benchmarks\Benchmarks.csproj", "{F031887C-EA60-4390-9940-765E99E69B8F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Shared.MultiExtractor", "MultiExtractor\Shared.MultiExtractor.csproj", "{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -53,10 +55,6 @@ Global
{C19A98D2-629D-4F4D-87E4-3154416970BA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C19A98D2-629D-4F4D-87E4-3154416970BA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C19A98D2-629D-4F4D-87E4-3154416970BA}.Release|Any CPU.Build.0 = Release|Any CPU
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Release|Any CPU.Build.0 = Release|Any CPU
{824ED27E-A4CF-46A6-A01F-98B0821EB61C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{824ED27E-A4CF-46A6-A01F-98B0821EB61C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{824ED27E-A4CF-46A6-A01F-98B0821EB61C}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand All @@ -65,6 +63,14 @@ Global
{181BD826-A428-41D9-8BEC-0D8EB2288DF5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{181BD826-A428-41D9-8BEC-0D8EB2288DF5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{181BD826-A428-41D9-8BEC-0D8EB2288DF5}.Release|Any CPU.Build.0 = Release|Any CPU
{F031887C-EA60-4390-9940-765E99E69B8F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{F031887C-EA60-4390-9940-765E99E69B8F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{F031887C-EA60-4390-9940-765E99E69B8F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{F031887C-EA60-4390-9940-765E99E69B8F}.Release|Any CPU.Build.0 = Release|Any CPU
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
6 changes: 1 addition & 5 deletions AppInspector/AppInspector.Commands.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,7 @@
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\MultiExtractor\MultiExtractor.csproj">
<ReferenceOutputAssembly>true</ReferenceOutputAssembly>
<IncludeAssets>MultiExtractor.dll</IncludeAssets>
</ProjectReference>

<ProjectReference Include="..\MultiExtractor\Shared.MultiExtractor.csproj"/>
<ProjectReference Include="..\RulesEngine\AppInspector.RulesEngine.csproj">
<ReferenceOutputAssembly>true</ReferenceOutputAssembly>
<IncludeAssets>ApplicationInspector.RulesEngine.dll</IncludeAssets>
Expand Down
29 changes: 16 additions & 13 deletions AppInspector/Commands/AnalyzeCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the MIT License. See LICENSE.txt in the project root for license information.

using Microsoft.ApplicationInspector.RulesEngine;
using MultiExtractor;
using Microsoft.CST.OpenSource.MultiExtractor;
using Newtonsoft.Json;
using NLog;
using System;
Expand All @@ -11,6 +11,7 @@
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace Microsoft.ApplicationInspector.Commands
{
Expand Down Expand Up @@ -289,7 +290,6 @@ private void ConfigRules()
public AnalyzeResult GetResult()
{
WriteOnce.SafeLog("AnalyzeCommand::Run", LogLevel.Trace);

WriteOnce.Operation(MsgHelp.FormatString(MsgHelp.ID.CMD_RUNNING, "Analyze"));
AnalyzeResult analyzeResult = new AnalyzeResult()
{
Expand Down Expand Up @@ -339,7 +339,7 @@ public AnalyzeResult GetResult()
}
else
{
_srcfileList.AsParallel().ForAll(filename => ProcessFile(filename));
Parallel.ForEach(_srcfileList, filename => ProcessFile(filename));
}

WriteOnce.General("\r" + MsgHelp.FormatString(MsgHelp.ID.ANALYZE_FILES_PROCESSED_PCNT, 100));
Expand Down Expand Up @@ -385,7 +385,7 @@ private void ProcessAsFile(string filename)
if (FileChecksPassed(filename, ref languageInfo))
{
LastUpdated = File.GetLastWriteTime(filename);
_metaDataHelper.Metadata.PackageTypes.Add(MsgHelp.GetString(MsgHelp.ID.ANALYZE_UNCOMPRESSED_FILETYPE));
_ = _metaDataHelper.Metadata.PackageTypes.TryAdd(MsgHelp.GetString(MsgHelp.ID.ANALYZE_UNCOMPRESSED_FILETYPE),0);

string fileText = File.ReadAllText(filename);
ProcessInMemory(filename, fileText, languageInfo);
Expand Down Expand Up @@ -602,7 +602,7 @@ private string ExtractDependency(string text, int startIndex, SearchPattern patt
}

string finalResult = rawResult.Replace(";", "");
_metaDataHelper.Metadata.UniqueDependencies.Add(finalResult);
_ = _metaDataHelper.Metadata.UniqueDependencies.TryAdd(finalResult,0);

return System.Net.WebUtility.HtmlEncode(finalResult);
}
Expand Down Expand Up @@ -645,11 +645,12 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
}

LastUpdated = File.GetLastWriteTime(filePath);
_metaDataHelper.Metadata.PackageTypes.Add(MsgHelp.GetString(MsgHelp.ID.ANALYZE_COMPRESSED_FILETYPE));
_ = _metaDataHelper.Metadata.PackageTypes.TryAdd(MsgHelp.GetString(MsgHelp.ID.ANALYZE_COMPRESSED_FILETYPE),0);

try
{
IEnumerable<FileEntry> files = Extractor.ExtractFile(filePath).Where(x => x != null);
var extractor = new Extractor();
IEnumerable<FileEntry> files = extractor.ExtractFile(filePath,!_options.SingleThread);

if (_options.SingleThread)
{
Expand All @@ -661,8 +662,9 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
LanguageInfo languageInfo = new LanguageInfo();
if (FileChecksPassed(file.FullPath, ref languageInfo, file.Content.Length))
{
byte[] streamByteArray = file.Content.ToArray();
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray, 0, streamByteArray.Length), languageInfo);
var streamByteArray = new byte[file.Content.Length];
file.Content.Read(streamByteArray);
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray), languageInfo);
}
}
catch (Exception)
Expand All @@ -673,16 +675,17 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
}
else
{
files.AsParallel().ForAll(file =>
Parallel.ForEach(files, file =>
{
try
{
//check uncompressed file passes standard checks
LanguageInfo languageInfo = new LanguageInfo();
if (FileChecksPassed(file.FullPath, ref languageInfo, file.Content.Length))
{
byte[] streamByteArray = file.Content.ToArray();
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray, 0, streamByteArray.Length), languageInfo);
var streamByteArray = new byte[file.Content.Length];
file.Content.Read(streamByteArray);
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray), languageInfo);
}
}
catch (Exception)
Expand Down Expand Up @@ -712,7 +715,7 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
/// <returns></returns>
private bool FileChecksPassed(string filePath, ref LanguageInfo languageInfo, long fileLength = 0)
{
_metaDataHelper.Metadata.FileExtensions.Add(Path.GetExtension(filePath).Replace('.', ' ').TrimStart());
_ = _metaDataHelper.Metadata.FileExtensions.TryAdd(Path.GetExtension(filePath).Replace('.', ' ').TrimStart(),0);

// 1. Skip files written in unknown language
if (!Language.FromFileName(filePath, ref languageInfo))
Expand Down
4 changes: 2 additions & 2 deletions AppInspector/Commands/TagDiffCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ public TagDiffResult GetResult()
int sizeTags1 = analyze1.Metadata.UniqueTags.Count;
string[] file1Tags = new string[sizeTags1];

foreach (string tag in analyze1.Metadata.UniqueTags.ToList<string>())
foreach (string tag in analyze1.Metadata.UniqueTags.Keys.ToList<string>())
{
file1Tags[count1++] = tag;
}
Expand All @@ -231,7 +231,7 @@ public TagDiffResult GetResult()
int sizeTags2 = analyze2.Metadata.UniqueTags.Count;
string[] file2Tags = new string[sizeTags2];

foreach (string tag in analyze2.Metadata.UniqueTags.ToList<string>())
foreach (string tag in analyze2.Metadata.UniqueTags.Keys.ToList<string>())
{
file2Tags[count2++] = tag;
}
Expand Down
2 changes: 1 addition & 1 deletion AppInspector/Commands/TagTestCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ public TagTestResult GetResult()
int sizeTags = analyze1.Metadata.UniqueTags.Count;
string[] tagsFound = new string[sizeTags];

foreach (string tag in analyze1.Metadata.UniqueTags.ToList<string>())
foreach (string tag in analyze1.Metadata.UniqueTags.Keys.ToList<string>())
{
tagsFound[count++] = tag;
}
Expand Down
Loading

0 comments on commit e976a5c

Please sign in to comment.