-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbackground.js
153 lines (138 loc) · 5.96 KB
/
background.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*
korporize for korpora (Apache 2.0) by https://github.com/gnonio
Copyright 2020 Pedro SOARES
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
async function conditionalCPInject( tabId ) {
// we are injecting to content page only at user demand
// but given the possibility that the user navigates away and back to an already injected page
// and the difficulty in determining injected scripts state and availability from background page
// we rely on the failure to inject with a dummy script to proceed or not with our code injection
// (not erring out is the condition itself to proceed)
let injectGuard = await browser.tabs.executeScript( tabId, {file: "/js/injected.js"} )
.then( async function() {
let insertingCSS = await browser.tabs.insertCSS( tabId, {file: "/js/content.css?" + Date.now()} )
// CRITICAL: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/tabs/executeScript#Return_value
// ALL scripts but the last require some return value as a workaround: ie. <string> "name.js"
// last script is executing an async with return value itself
// we would be splitting execution... or smtg (this is uncomfortable)
let executingScript
executingScript = await browser.tabs.executeScript( tabId, {file: "/js/utils.js"} )
executingScript = await browser.tabs.executeScript( tabId, {file: "/js/common.js"} )
executingScript = await browser.tabs.executeScript( tabId, {file: "/js/languages.js"} )
executingScript = await browser.tabs.executeScript( tabId, {file: "/js/content.js"} )
executingScript = await browser.tabs.executeScript( tabId, {file: "/js/content-ui.js"} )
})/*.catch( function(error) {
console.log("Inject error", error)
})*/
}
function handleMessage(message, sender, sendResponse) {
switch (message.method) {
/*
CONTENT PAGE COMMUNICATION
*/
case "BG_extractTextLoadedImage":
let config = message.data
config.tabId = sender.tab.id
config.logger = korporizeLogger
OCRLoadedImage( config )
break
case "BG_kOptions":
browser.runtime.openOptionsPage()
break
}
}
browser.runtime.onMessage.addListener( handleMessage )
function korporizeLogger(tabId, msg) {
browser.tabs.sendMessage(tabId, {method: "CP_korporizeLogger", data: msg} )
}
function OCRLoadedImage(config) {
//browser.tabs.sendMessage(tabId, {method: "CP_tesseractLanguage", data: language} )
if ( config.tabId && config.image && config.language ) {
cron( extractTextImage, [ config ] )
.then( (resolve, reject) => {
if ( resolve ) {
browser.tabs.sendMessage( config.tabId, {
method: "CP_showOCRResult", data: resolve } )
} else {
browser.tabs.sendMessage( config.tabId, {
method: "CP_showOCRResult", data: reject } )
}
} )
} else {
console.warn("OCRLoadedImage", config)
}
}
async function extractTextImage( config ) {
/*config.tabId, config.logger,
config.element, config.image,
config.language, config.quality, config.psm*/
//https://github.com/naptha/tesseract.js/blob/master/docs/api.md
const createWorker = Tesseract.createWorker
//https://github.com/naptha/tessdata
let language = config.language ? config.language : 'eng'
// 'lib/lang-data/' | 'https://tessdata.projectnaptha.com/'
//let datapath = 'lib/lang-data/'
let datapath = 'https://tessdata.projectnaptha.com/'
// 4.0.0 | 4.0.0_best | 4.0.0_fast
let traindata = config.quality ? config.quality : "4.0.0_fast"
// write | readOnly | refresh | none
let cachedata = 'write'
// OEM_TESSERACT_ONLY | OEM_LSTM_ONLY | OEM_DEFAULT
//let OEM = oem ? Tesseract.PSM[oem] : Tesseract.OSM.OEM_DEFAULT
// AUTO | AUTO_OSD | SINGLE_BLOCK
let PSM = config.psm ? Tesseract.PSM[config.psm] : Tesseract.PSM.SINGLE_BLOCK
let options = {
workerPath: 'lib/worker.min.js',
corePath: 'lib/tesseract-core.wasm.js', // .asm.js = SLOWER
langPath: datapath + traindata,
cachePath: traindata,
cacheMethod: cachedata,
// CRITICAL (Content Security Policy): workerBlobURL must be set to false
// The page's settings blocked the loading of a resource at blob:moz-extension:// .../... ("script-src").
// Check: spawnWorker.js
// https://github.com/naptha/tesseract.js/issues/219
// > https://github.com/naptha/tesseract.js/pull/322
workerBlobURL: false,
logger: m => config.logger( config.tabId, m ),
errorHandler: e => config.logger( config.tabId, e )
}
let parameters = {
//tessedit_ocr_engine_mode: OEM,
tessedit_pageseg_mode: PSM,
/*tessedit_char_whitelist: '',
preserve_interword_spaces: '0',
user_defined_dpi: '',
tessedit_create_hocr: '1',
tessedit_create_tsv: '1',
tessedit_create_box: '0',
tessedit_create_unlv: '0',
tessedit_create_osd: '0',*/
}
if ( DEBUG ) console.log( language, options, parameters )
let worker = createWorker( options )
let result
try {
await worker.load()
await worker.loadLanguage( language )
await worker.initialize( language )
await worker.setParameters( parameters )
result = await worker.recognize( config.image )
result.data.image = config.image
result.data.element = config.element
result.data.language = config.language
await worker.terminate()
} catch (e) {
console.warn(e)
result = {data: { language: language, text: "Error", confidence: 0 }}
}
return result
}