-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from KEKDATA/feature/Auth-profile
Feature/auth profile
- Loading branch information
Showing
53 changed files
with
4,857 additions
and
1,535 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,21 @@ | ||
Диаграмма процессов API, парсинга | ||
<img src="https://d.radikal.ru/d32/2005/e4/57263917df9b.jpg" /> | ||
|
||
Download Node.js https://nodejs.org/en/download/ | ||
Download Python 3 https://www.python.org/downloads/ | ||
|
||
Open dir backend | ||
|
||
```bash | ||
$ npm i | ||
``` | ||
```bash | ||
$ pip install dostoevsky | ||
``` | ||
```bash | ||
$ python -m dostoevsky download fasttext-social-network-model | ||
``` | ||
```bash | ||
$ npm run start | ||
``` | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
export const RU = 'ru'; | ||
export const ENG = 'eng'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,38 @@ | ||
import { aposToLexForm } from '../lex_form_convert/apos_to_lex_form'; | ||
import { ENG, RU } from '../../constants/language'; | ||
|
||
export const getTextWithAlphaOnly = (text: string) => { | ||
const textLexicalForm = aposToLexForm(text); | ||
const casedText = textLexicalForm.toLowerCase(); | ||
const textWithAlphaOnly = casedText.replace(/[^a-zA-Z\s]+/g, ''); | ||
const withoutLinks = casedText.replace(/(https?:\/\/[^\s]+)/g, ''); | ||
const russianText = withoutLinks.replace(/[^а-яА-Я0-9\s]+/g, ''); | ||
const englishText = withoutLinks.replace(/[^a-zA-Z0-9\s]+/g, ''); | ||
|
||
return textWithAlphaOnly; | ||
let language = ''; | ||
|
||
if (russianText.length > englishText.length) { | ||
language = RU; | ||
} else { | ||
language = ENG; | ||
} | ||
|
||
let normalizedText = ''; | ||
|
||
switch (language) { | ||
case RU: { | ||
normalizedText = russianText; | ||
break; | ||
} | ||
|
||
case ENG: { | ||
normalizedText = englishText; | ||
break; | ||
} | ||
|
||
default: { | ||
break; | ||
} | ||
} | ||
|
||
return { text: normalizedText, language }; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
``` | ||
@inproceedings{rogers-etal-2018-rusentiment, | ||
title = "{R}u{S}entiment: An Enriched Sentiment Analysis Dataset for Social Media in {R}ussian", | ||
author = "Rogers, Anna and | ||
Romanov, Alexey and | ||
Rumshisky, Anna and | ||
Volkova, Svitlana and | ||
Gronas, Mikhail and | ||
Gribov, Alex", | ||
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics", | ||
month = aug, | ||
year = "2018", | ||
address = "Santa Fe, New Mexico, USA", | ||
publisher = "Association for Computational Linguistics", | ||
url = "https://www.aclweb.org/anthology/C18-1064", | ||
pages = "755--763", | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import { spawn } from 'child_process'; | ||
|
||
export const getRuSentiment = async ( | ||
data: Array<{ text: string; textIndex: number }>, | ||
) => { | ||
// Пустые значения не принимает библиотека и крашится | ||
const notEmptyData = data | ||
.map(({ text }) => text) | ||
.filter(text => { | ||
const withoutSpaces = text.replace(/\s/g, ''); | ||
|
||
return withoutSpaces.length > 0; | ||
}); | ||
|
||
let indexOfSentiment = 0; | ||
|
||
const pythonProcess = spawn('python3', [ | ||
'src/lib/ru_social_sentiment/sentiment_coefficient.py', | ||
JSON.stringify(notEmptyData), | ||
]); | ||
|
||
const getSentiments = () => { | ||
return new Promise<{ | ||
dataWithSentiments: { [key: string]: number }; | ||
countOfSentimentCoefficients: number; | ||
}>((resolve, reject) => { | ||
pythonProcess.stdout.on('data', (sentiments: ArrayBuffer) => { | ||
const result = sentiments.toString(); | ||
|
||
const normalizedSentiments: { [key: string]: number } = {}; | ||
|
||
let countOfSentimentCoefficients = 0; | ||
|
||
if (result !== undefined && result.length > 0) { | ||
const parsedResult = JSON.parse(result.replace(/'/g, '"')); | ||
|
||
data.forEach(({ text, textIndex }) => { | ||
const withoutSpaces = text.replace(/\s/g, ''); | ||
let coefficient = 0; | ||
|
||
if (withoutSpaces.length > 0) { | ||
const sentiment: { [key: string]: number } = | ||
parsedResult[indexOfSentiment]; | ||
|
||
const sentimentKeys = Object.keys(sentiment); | ||
const isNegative = sentimentKeys.includes('negative'); | ||
const isPositive = sentimentKeys.includes('positive'); | ||
|
||
if (isNegative) { | ||
coefficient = -1 * sentiment.negative; | ||
} else if (isPositive) { | ||
coefficient = sentiment.positive; | ||
} else { | ||
const [_, valueSentiment] = Object.entries(sentiment)[0]; | ||
|
||
coefficient = valueSentiment; | ||
} | ||
|
||
indexOfSentiment++; | ||
|
||
countOfSentimentCoefficients = | ||
countOfSentimentCoefficients + coefficient; | ||
} | ||
|
||
normalizedSentiments[textIndex] = coefficient; | ||
}); | ||
} | ||
|
||
resolve({ | ||
dataWithSentiments: normalizedSentiments, | ||
countOfSentimentCoefficients, | ||
}); | ||
|
||
pythonProcess.kill('SIGTERM'); | ||
}); | ||
}); | ||
}; | ||
|
||
const result = await getSentiments(); | ||
|
||
return result; | ||
}; |
21 changes: 21 additions & 0 deletions
21
backend/src/lib/ru_social_sentiment/sentiment_coefficient.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Данная библиотека имеет приватный дата сет под тональность соц сетей, который показал неплохие результаты | ||
# В следствии чего - закинул скрипт Python 3. | ||
# "Говнопалка" | ||
|
||
from dostoevsky.tokenization import RegexTokenizer | ||
from dostoevsky.models import FastTextSocialNetworkModel | ||
|
||
import sys | ||
import json | ||
|
||
tokenizer = RegexTokenizer() | ||
|
||
model = FastTextSocialNetworkModel(tokenizer=tokenizer) | ||
|
||
texts = json.loads(sys.argv[1]) | ||
|
||
results = model.predict(texts, k=2) | ||
|
||
print(results) | ||
|
||
sys.stdout.flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.