Skip to content

Commit

Permalink
Merge pull request #10 from g4s8/9-race-workflow
Browse files Browse the repository at this point in the history
Added Github action to reproduce races and fixed
  • Loading branch information
bzz authored Apr 22, 2021
2 parents 79d765a + b748bcc commit 29d8148
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 2 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/race.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
on: [push, pull_request]
name: Race
jobs:
race:
runs-on: ubuntu-latest
steps:
- name: Install Go
uses: actions/setup-go@v1
with:
go-version: 1.14.x
- name: Checkout code
uses: actions/checkout@v2
- name: Test race
run: go test -v -race ./...
13 changes: 11 additions & 2 deletions licensedb/internal/nlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"regexp"
"sort"
"strings"
"sync"

"github.com/jdkato/prose/chunk"
"github.com/jdkato/prose/tag"
Expand All @@ -18,7 +19,8 @@ var (
digitsRe = regexp.MustCompile(`[0-9]+`)
disabledNamePartsRe = regexp.MustCompile(`clause|or|only|deprecated|later`)

tagger = tag.NewPerceptronTagger()
tagger = tag.NewPerceptronTagger()
chunkLock sync.Mutex
)

// investigateReadmeFile uses NER to match license name mentions.
Expand Down Expand Up @@ -64,7 +66,8 @@ func investigateReadmeFile(
}
suspectedText := text[beginIndex:endIndex]
suspectedWords := tokenize.TextToWords(suspectedText)
for _, entity := range chunk.Chunk(tagger.Tag(suspectedWords), chunk.TreebankNamedEntities) {
chunks := readmeChunks(tagger.Tag(suspectedWords))
for _, entity := range chunks {
if garbageReadmeRe.MatchString(entity) {
continue
}
Expand Down Expand Up @@ -113,6 +116,12 @@ func investigateReadmeFile(
return candidates
}

func readmeChunks(tokens []tag.Token) []string {
chunkLock.Lock()
defer chunkLock.Unlock()
return chunk.Chunk(tokens, chunk.TreebankNamedEntities)
}

func splitLicenseName(name string) []substring {
counts := map[string]int{}
parts := licenseNamePartRe.FindAllString(strings.ToLower(name), -1)
Expand Down
7 changes: 7 additions & 0 deletions licensedb/internal/processors/markup.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@ package processors

import (
"bytes"
"sync"

rst "github.com/hhatto/gorst"
"github.com/russross/blackfriday/v2"
)

var (
parserLock sync.Mutex
)

// Markdown converts Markdown to plain text. It tries to revert all the decorations.
func Markdown(text []byte) []byte {
html := blackfriday.Run(text)
Expand All @@ -17,6 +22,8 @@ func Markdown(text []byte) []byte {
// RestructuredText converts ReStructuredText to plain text.
// It tries to revert all the decorations.
func RestructuredText(text []byte) []byte {
parserLock.Lock()
defer parserLock.Unlock()
parser := rst.NewParser(nil)
input := bytes.NewBuffer(text)
output := &bytes.Buffer{}
Expand Down

0 comments on commit 29d8148

Please sign in to comment.