Skip to content

Commit

Permalink
Merge pull request #16 from KEKDATA/feature/Auth-profile
Browse files Browse the repository at this point in the history
Feature/auth profile
  • Loading branch information
TchernyavskyDaniil authored Jun 2, 2020
2 parents 62bdd3d + 594f670 commit c24bb4e
Show file tree
Hide file tree
Showing 53 changed files with 4,857 additions and 1,535 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
1) frontend - Директория для веб клиента
2) backend - Директория для консольного (или не очень консольного, тк можно с помощью электрона запустить) приложения по парсингу, на текущий момент, твитера и API Websocket

TODO:
1) Error handlers

## SUPER KEKSPONSOR

<table>
Expand Down
19 changes: 19 additions & 0 deletions backend/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,21 @@
Диаграмма процессов API, парсинга
<img src="https://d.radikal.ru/d32/2005/e4/57263917df9b.jpg" />

Download Node.js https://nodejs.org/en/download/
Download Python 3 https://www.python.org/downloads/

Open dir backend

```bash
$ npm i
```
```bash
$ pip install dostoevsky
```
```bash
$ python -m dostoevsky download fasttext-social-network-model
```
```bash
$ npm run start
```

2,306 changes: 1,809 additions & 497 deletions backend/package-lock.json

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,33 @@
"dependencies": {
"bull": "^3.14.0",
"cheerio": "^1.0.0-rc.3",
"effector": "^20.15.6",
"effector": "^20.15.8",
"env-cmd": "^10.1.0",
"foreman": "^3.0.1",
"nanoid": "^3.1.9",
"natural": "^2.1.5",
"playwright": "^1.0.2",
"ramda": "^0.27.0",
"redis": "^3.0.2",
"sequelize": "^5.21.10",
"sequelize": "^5.21.11",
"spelling-corrector": "^3.0.0",
"sqlite3": "^4.2.0",
"stopword": "^1.0.1",
"ws": "^7.3.0"
},
"devDependencies": {
"@babel/cli": "^7.8.4",
"@babel/core": "^7.9.6",
"@babel/node": "^7.8.7",
"@babel/plugin-transform-typescript": "^7.9.6",
"@babel/preset-env": "^7.9.6",
"@babel/preset-typescript": "^7.9.0",
"@babel/cli": "^7.10.1",
"@babel/core": "^7.10.2",
"@babel/node": "^7.10.1",
"@babel/plugin-transform-typescript": "^7.10.1",
"@babel/preset-env": "^7.10.2",
"@babel/preset-typescript": "^7.10.1",
"@types/bull": "^3.13.0",
"@types/node": "^14.0.5",
"@types/redis": "^2.8.21",
"@types/node": "^14.0.6",
"@types/redis": "^2.8.22",
"@types/sequelize": "^4.28.9",
"@types/ws": "^7.2.4",
"@typescript-eslint/parser": "^3.0.0",
"@typescript-eslint/parser": "^3.1.0",
"babel-eslint": "^10.1.0",
"eslint": "^7.1.0",
"eslint-config-prettier": "^6.11.0",
Expand All @@ -56,14 +56,14 @@
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-prettier": "^3.1.3",
"husky": "^4.2.5",
"lint-staged": "^10.2.6",
"lint-staged": "^10.2.7",
"prettier": "1.19.1",
"ts-node": "^8.10.1",
"ts-node": "^8.10.2",
"typescript": "^3.9.3",
"@types/cheerio": "^0.22.18",
"@types/natural": "^0.6.3",
"@types/ramda": "^0.27.6",
"electron": "^9.0.0"
"electron": "^9.0.1"
},
"husky": {
"hooks": {
Expand Down
2 changes: 2 additions & 0 deletions backend/src/constants/language.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export const RU = 'ru';
export const ENG = 'eng';
33 changes: 31 additions & 2 deletions backend/src/lib/normalizers/alphabet.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,38 @@
import { aposToLexForm } from '../lex_form_convert/apos_to_lex_form';
import { ENG, RU } from '../../constants/language';

export const getTextWithAlphaOnly = (text: string) => {
const textLexicalForm = aposToLexForm(text);
const casedText = textLexicalForm.toLowerCase();
const textWithAlphaOnly = casedText.replace(/[^a-zA-Z\s]+/g, '');
const withoutLinks = casedText.replace(/(https?:\/\/[^\s]+)/g, '');
const russianText = withoutLinks.replace(/[^а-яА-Я0-9\s]+/g, '');
const englishText = withoutLinks.replace(/[^a-zA-Z0-9\s]+/g, '');

return textWithAlphaOnly;
let language = '';

if (russianText.length > englishText.length) {
language = RU;
} else {
language = ENG;
}

let normalizedText = '';

switch (language) {
case RU: {
normalizedText = russianText;
break;
}

case ENG: {
normalizedText = englishText;
break;
}

default: {
break;
}
}

return { text: normalizedText, language };
};
19 changes: 19 additions & 0 deletions backend/src/lib/ru_social_sentiment/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
```
@inproceedings{rogers-etal-2018-rusentiment,
title = "{R}u{S}entiment: An Enriched Sentiment Analysis Dataset for Social Media in {R}ussian",
author = "Rogers, Anna and
Romanov, Alexey and
Rumshisky, Anna and
Volkova, Svitlana and
Gronas, Mikhail and
Gribov, Alex",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/C18-1064",
pages = "755--763",
}
```
82 changes: 82 additions & 0 deletions backend/src/lib/ru_social_sentiment/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { spawn } from 'child_process';

export const getRuSentiment = async (
data: Array<{ text: string; textIndex: number }>,
) => {
// Пустые значения не принимает библиотека и крашится
const notEmptyData = data
.map(({ text }) => text)
.filter(text => {
const withoutSpaces = text.replace(/\s/g, '');

return withoutSpaces.length > 0;
});

let indexOfSentiment = 0;

const pythonProcess = spawn('python3', [
'src/lib/ru_social_sentiment/sentiment_coefficient.py',
JSON.stringify(notEmptyData),
]);

const getSentiments = () => {
return new Promise<{
dataWithSentiments: { [key: string]: number };
countOfSentimentCoefficients: number;
}>((resolve, reject) => {
pythonProcess.stdout.on('data', (sentiments: ArrayBuffer) => {
const result = sentiments.toString();

const normalizedSentiments: { [key: string]: number } = {};

let countOfSentimentCoefficients = 0;

if (result !== undefined && result.length > 0) {
const parsedResult = JSON.parse(result.replace(/'/g, '"'));

data.forEach(({ text, textIndex }) => {
const withoutSpaces = text.replace(/\s/g, '');
let coefficient = 0;

if (withoutSpaces.length > 0) {
const sentiment: { [key: string]: number } =
parsedResult[indexOfSentiment];

const sentimentKeys = Object.keys(sentiment);
const isNegative = sentimentKeys.includes('negative');
const isPositive = sentimentKeys.includes('positive');

if (isNegative) {
coefficient = -1 * sentiment.negative;
} else if (isPositive) {
coefficient = sentiment.positive;
} else {
const [_, valueSentiment] = Object.entries(sentiment)[0];

coefficient = valueSentiment;
}

indexOfSentiment++;

countOfSentimentCoefficients =
countOfSentimentCoefficients + coefficient;
}

normalizedSentiments[textIndex] = coefficient;
});
}

resolve({
dataWithSentiments: normalizedSentiments,
countOfSentimentCoefficients,
});

pythonProcess.kill('SIGTERM');
});
});
};

const result = await getSentiments();

return result;
};
21 changes: 21 additions & 0 deletions backend/src/lib/ru_social_sentiment/sentiment_coefficient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Данная библиотека имеет приватный дата сет под тональность соц сетей, который показал неплохие результаты
# В следствии чего - закинул скрипт Python 3.
# "Говнопалка"

from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel

import sys
import json

tokenizer = RegexTokenizer()

model = FastTextSocialNetworkModel(tokenizer=tokenizer)

texts = json.loads(sys.argv[1])

results = model.predict(texts, k=2)

print(results)

sys.stdout.flush()
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,25 @@ import {
//@ts-ignore
import stopword from 'stopword';

export const getTextWithSentimentAnalysis = (data: Array<string>) => {
export const getTextWithSentimentAnalysis = (
data: Array<{ text: string; textIndex: number }>,
) => {
const tokenizer = new WordTokenizer();
const analyzer = new SentimentAnalyzer('English', PorterStemmer, 'afinn');

const dataWithSentiments = [];
const dataWithSentiments: { [key: string]: number } = {};

let countOfSentimentCoefficients = 0;
let lengthOfData = data.length;

for (let i = 0; i < lengthOfData; i++) {
const elementOfData = data[i];
for (let i = 0; i < data.length; i++) {
const { text, textIndex } = data[i];

const tokenizedData = tokenizer.tokenize(elementOfData);
const tokenizedData = tokenizer.tokenize(text);

const dataWithoutStopWords = stopword.removeStopwords(tokenizedData);

if (dataWithoutStopWords.length === 0) {
dataWithSentiments.push(0);
dataWithSentiments[textIndex] = 0;
continue;
}

Expand All @@ -35,10 +36,8 @@ export const getTextWithSentimentAnalysis = (data: Array<string>) => {
countOfSentimentCoefficients =
sentimentCoefficient + countOfSentimentCoefficients;

dataWithSentiments.push(sentimentCoefficient);
dataWithSentiments[textIndex] = sentimentCoefficient;
}

const meanSentiment = countOfSentimentCoefficients / lengthOfData;

return { dataWithSentiments, meanSentiment };
return { dataWithSentiments, countOfSentimentCoefficients };
};
4 changes: 2 additions & 2 deletions backend/src/queues/bayes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { OPTIONS, MAX_JOBS_PER_WORKER } from './config';
console.info('Bayes connected');

const bayesQueue = new Queue('bayes', OPTIONS);
const callbackQueue = new Queue('callback', OPTIONS);
const mergeQueue = new Queue('merge', OPTIONS);

bayesQueue.process(MAX_JOBS_PER_WORKER, job => {
const {
Expand All @@ -19,7 +19,7 @@ bayesQueue.process(MAX_JOBS_PER_WORKER, job => {
normalizedTweetsForAnalysis,
);

callbackQueue.add({
mergeQueue.add({
jobId: id,
options: { tweetsWithBayesClassifier, isBayes: true },
});
Expand Down
6 changes: 4 additions & 2 deletions backend/src/queues/merge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ import { insertionSentimentTweetsSort } from '../twitter/tweets/lib/insertion_se
import { OPTIONS, MAX_JOBS_PER_WORKER } from './config';
import { Send } from '../types';

const callbackQueue = new Queue('callback', OPTIONS);
console.info('Merge connected');

const mergeQueue = new Queue('merge', OPTIONS);
const webQueue = new Queue('web', OPTIONS);

// TODO: Чистить со временем или после завершения работы нужных процессов.
const jobsProgress = new Map();

callbackQueue.process(MAX_JOBS_PER_WORKER, job => {
mergeQueue.process(MAX_JOBS_PER_WORKER, job => {
const { jobId, options }: { jobId: string; options: Send } = job.data;

const jobOptions = jobsProgress.get(jobId);
Expand Down
Loading

0 comments on commit c24bb4e

Please sign in to comment.