From 544b75fd7f36e430df99caa85ae33641055e67bd Mon Sep 17 00:00:00 2001
From: Kirill <g4s8.public@gmail.com>
Date: Mon, 5 Apr 2021 15:02:58 +0300
Subject: [PATCH 1/2] Added Github action workflows to reproduce races

For #9 - reproducing data races in `licensedb.Detect()`:
added workflow for tests with `-race` flag enabled.
---
 .github/workflows/race.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .github/workflows/race.yml

diff --git a/.github/workflows/race.yml b/.github/workflows/race.yml
new file mode 100644
index 0000000..6d3d8e2
--- /dev/null
+++ b/.github/workflows/race.yml
@@ -0,0 +1,14 @@
+on: [push, pull_request]
+name: Race
+jobs:
+  race:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Install Go
+      uses: actions/setup-go@v1
+      with:
+        go-version: 1.14.x
+    - name: Checkout code
+      uses: actions/checkout@v2
+    - name: Test race
+      run: go test -v -race ./...

From b748bccfe75ed456b9da9fa43cd46f6aa43539cd Mon Sep 17 00:00:00 2001
From: Kirill <g4s8.public@gmail.com>
Date: Thu, 15 Apr 2021 18:44:13 +0300
Subject: [PATCH 2/2] Fixes #9 - added two mutexes for problem calls

Added mutex for prose chunks processor which uses single regex
instance not in thread-safe way by wrapping chunks() call with mutex;
Added mutex for rst to markdown processor which is also reported by
race detector.
---
 licensedb/internal/nlp.go               | 13 +++++++++++--
 licensedb/internal/processors/markup.go |  7 +++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/licensedb/internal/nlp.go b/licensedb/internal/nlp.go
index 11afd62..6162ad8 100644
--- a/licensedb/internal/nlp.go
+++ b/licensedb/internal/nlp.go
@@ -4,6 +4,7 @@ import (
 	"regexp"
 	"sort"
 	"strings"
+	"sync"
 
 	"github.com/jdkato/prose/chunk"
 	"github.com/jdkato/prose/tag"
@@ -18,7 +19,8 @@ var (
 	digitsRe            = regexp.MustCompile(`[0-9]+`)
 	disabledNamePartsRe = regexp.MustCompile(`clause|or|only|deprecated|later`)
 
-	tagger = tag.NewPerceptronTagger()
+	tagger    = tag.NewPerceptronTagger()
+	chunkLock sync.Mutex
 )
 
 // investigateReadmeFile uses NER to match license name mentions.
@@ -64,7 +66,8 @@ func investigateReadmeFile(
 	}
 	suspectedText := text[beginIndex:endIndex]
 	suspectedWords := tokenize.TextToWords(suspectedText)
-	for _, entity := range chunk.Chunk(tagger.Tag(suspectedWords), chunk.TreebankNamedEntities) {
+	chunks := readmeChunks(tagger.Tag(suspectedWords))
+	for _, entity := range chunks {
 		if garbageReadmeRe.MatchString(entity) {
 			continue
 		}
@@ -113,6 +116,12 @@ func investigateReadmeFile(
 	return candidates
 }
 
+func readmeChunks(tokens []tag.Token) []string {
+	chunkLock.Lock()
+	defer chunkLock.Unlock()
+	return chunk.Chunk(tokens, chunk.TreebankNamedEntities)
+}
+
 func splitLicenseName(name string) []substring {
 	counts := map[string]int{}
 	parts := licenseNamePartRe.FindAllString(strings.ToLower(name), -1)
diff --git a/licensedb/internal/processors/markup.go b/licensedb/internal/processors/markup.go
index a863578..691afef 100644
--- a/licensedb/internal/processors/markup.go
+++ b/licensedb/internal/processors/markup.go
@@ -2,11 +2,16 @@ package processors
 
 import (
 	"bytes"
+	"sync"
 
 	rst "github.com/hhatto/gorst"
 	"github.com/russross/blackfriday/v2"
 )
 
+var (
+	parserLock sync.Mutex
+)
+
 // Markdown converts Markdown to plain text. It tries to revert all the decorations.
 func Markdown(text []byte) []byte {
 	html := blackfriday.Run(text)
@@ -17,6 +22,8 @@ func Markdown(text []byte) []byte {
 // RestructuredText converts ReStructuredText to plain text.
 // It tries to revert all the decorations.
 func RestructuredText(text []byte) []byte {
+	parserLock.Lock()
+	defer parserLock.Unlock()
 	parser := rst.NewParser(nil)
 	input := bytes.NewBuffer(text)
 	output := &bytes.Buffer{}