-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 5c3499f
Showing
15 changed files
with
1,389 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Byte-compiled | ||
__pycache__/ | ||
|
||
# Environments | ||
venv/ | ||
|
||
# Configs | ||
category_dump.json | ||
config.ini | ||
|
||
# Logs | ||
error.log |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# CM Parser | ||
|
||
>🚬🚬🚬 | ||
CM Parser is designed for parsing product attributes. | ||
|
||
## Quick Start | ||
|
||
```bash | ||
pip install -r requirements.txt | ||
cp config.ini.example parser/config.ini | ||
cp category_dump.json.example parser/category_dump.json | ||
``` | ||
|
||
### Server | ||
|
||
```bash | ||
python parser/server.py | ||
``` | ||
|
||
### CLI | ||
|
||
```bash | ||
python parser/cli.py | ||
``` | ||
|
||
## Global Dependencies | ||
|
||
- [Python](https://www.python.org/downloads) (tested on v3.11.4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
[ | ||
{ | ||
"id": "1", | ||
"name": "Мониторы", | ||
"attributes": [ | ||
{ | ||
"name": "Модель" | ||
}, | ||
{ | ||
"name": "Диагональ экрана" | ||
}, | ||
{ | ||
"name": "Частота обновления экрана" | ||
}, | ||
{ | ||
"name": "Яркость" | ||
}, | ||
{ | ||
"name": "Время отклика" | ||
}, | ||
{ | ||
"name": "Максимальное разрешение" | ||
}, | ||
{ | ||
"name": "Глубина цвета" | ||
}, | ||
{ | ||
"name": "Контрастность" | ||
}, | ||
{ | ||
"name": "Веб-камера" | ||
}, | ||
{ | ||
"name": "Плотность пикселей" | ||
}, | ||
{ | ||
"name": "Поддержка HDR" | ||
}, | ||
{ | ||
"name": "Подсветка" | ||
}, | ||
{ | ||
"name": "Покрытие экрана" | ||
}, | ||
{ | ||
"name": "Соотношение сторон" | ||
}, | ||
{ | ||
"name": "Тип подсветки матрицы" | ||
}, | ||
{ | ||
"name": "Углы обзора" | ||
}, | ||
{ | ||
"name": "Технология защиты зрения" | ||
} | ||
] | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[DUCK] | ||
region = ru-ru | ||
fetch_timeout_sec = 13.5 | ||
link_count_limit = 150 | ||
|
||
[BRAIN] | ||
fetch_timeout_sec = 9.1 | ||
kv_len_range = (2, 85) | ||
k_threshold = 71 | ||
|
||
[SERVER] | ||
port = 8123 | ||
debug = false | ||
reload = true | ||
|
||
[API] | ||
secret = |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#-*- coding: utf-8 -*- | ||
from configparser import ConfigParser | ||
|
||
from src.utils.sugar import nonstop | ||
from src.green import Green | ||
from src.duck import Duck | ||
from src.brain import Brain | ||
|
||
import asyncio, sys, os | ||
|
||
|
||
_CONFIG_FILE_PATH = './config.ini' | ||
_CATEGORY_DUMP_FILE_PATH = './category_dump.json' | ||
|
||
@nonstop(2) | ||
def _get_product_model() -> str: | ||
product_model = input('[*] input product model: ').strip() | ||
assert product_model, 'no product model' | ||
|
||
return product_model | ||
|
||
@nonstop(2) | ||
def _get_category_id() -> int: | ||
category_id = int(input('[*] input category id: ')) | ||
assert category_id, 'no category id' | ||
|
||
return category_id | ||
|
||
async def _main() -> None: | ||
confdad = ConfigParser(converters={ | ||
'tupleint': lambda l: tuple( | ||
int(v.strip()) for v in l.strip('()').split(',') | ||
) | ||
}) | ||
assert confdad.read(_CONFIG_FILE_PATH), f'{_CONFIG_FILE_PATH} not found' | ||
|
||
product_model = _get_product_model() | ||
product_category_id = _get_category_id() | ||
|
||
green = Green(_CATEGORY_DUMP_FILE_PATH) | ||
await green.load_category_dump() | ||
|
||
attribute_names = green.get_attribute_fields('name', product_category_id) | ||
assert attribute_names, 'no attributes found' | ||
|
||
duck = Duck(confdad.get('DUCK', 'region')) | ||
product_links = duck.get_links( | ||
product_model, | ||
timeout_sec=confdad.getfloat('DUCK', 'fetch_timeout_sec'), | ||
count_limit=confdad.getint('DUCK', 'link_count_limit'), | ||
) | ||
assert product_links, 'no product links' | ||
|
||
summary = await Brain.get_product_summary( | ||
product_links, | ||
product_model=product_model, | ||
attribute_names=attribute_names, | ||
fetch_timeout_sec=confdad.getfloat('BRAIN', 'fetch_timeout_sec'), | ||
kv_len_range=confdad.gettupleint('BRAIN', 'kv_len_range'), | ||
k_threshold=confdad.getint('BRAIN', 'k_threshold') | ||
) | ||
print(f'\n[result]: {summary}') | ||
|
||
if __name__ == '__main__': | ||
ROOT = os.path.dirname(sys.argv[0]) | ||
ROOT and os.chdir(ROOT) | ||
|
||
asyncio.run(_main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
#-*- coding: utf-8 -*- | ||
from configparser import ConfigParser | ||
|
||
from fastapi import ( | ||
FastAPI, APIRouter, Depends, Request, HTTPException | ||
) | ||
from fastapi.middleware.cors import CORSMiddleware | ||
from pydantic import BaseModel | ||
import uvicorn | ||
|
||
from src.utils.sugar import datetime | ||
from src.green import Green | ||
from src.duck import Duck | ||
from src.brain import ( | ||
Brain, LinkSummary | ||
) | ||
|
||
import random, time, sys, os | ||
|
||
|
||
_ROOT = os.path.dirname(sys.argv[0]) | ||
_ROOT and os.chdir(_ROOT) | ||
|
||
_CONFIG_FILE_PATH = './config.ini' | ||
_CATEGORY_DUMP_FILE_PATH = './category_dump.json' | ||
|
||
class _APIface(object): | ||
class _AuthorizableRequest(BaseModel): | ||
secret: str | ||
|
||
class ParseRequest(_AuthorizableRequest): | ||
product_model: str | ||
category_id: int | ||
|
||
class ParseResponse(BaseModel): | ||
summary: list[LinkSummary] | ||
elapsed_time_sec: int | ||
parsers_used: int | ||
|
||
def _register_routes( | ||
app_router: APIRouter, | ||
confdad: ConfigParser | ||
) -> list[Depends]: | ||
green = Green(_CATEGORY_DUMP_FILE_PATH) | ||
duck = Duck(confdad.get('DUCK', 'region')) | ||
|
||
@app_router.post('/parse') | ||
async def _(request: _APIface.ParseRequest) -> _APIface.ParseResponse: | ||
request.product_model = request.product_model.strip() | ||
if not request.product_model: | ||
raise HTTPException(400, detail='empty product model') | ||
|
||
start_time = time.time() | ||
|
||
if not green.is_category_dump_loaded or random.randint(0, 12) == 0: | ||
await green.load_category_dump() | ||
|
||
attribute_names = green.get_attribute_fields('name', request.category_id) | ||
if not attribute_names: | ||
raise HTTPException(424, detail=f'no attributes found ({request.category_id})') | ||
|
||
product_links = duck.get_links( | ||
request.product_model, | ||
timeout_sec=confdad.getfloat('DUCK', 'fetch_timeout_sec'), | ||
count_limit=confdad.getint('DUCK', 'link_count_limit') | ||
) | ||
if not product_links: | ||
raise HTTPException(424, detail='failed to get links to sites') | ||
|
||
summary = await Brain.get_product_summary( | ||
product_links, | ||
product_model=request.product_model, | ||
attribute_names=attribute_names, | ||
fetch_timeout_sec=confdad.getfloat('BRAIN', 'fetch_timeout_sec'), | ||
kv_len_range=confdad.gettupleint('BRAIN', 'kv_len_range'), | ||
k_threshold=confdad.getint('BRAIN', 'k_threshold') | ||
) or [] | ||
|
||
return _APIface.ParseResponse( | ||
summary=summary, | ||
elapsed_time_sec=int(time.time() - start_time), | ||
parsers_used=1 | ||
) | ||
|
||
async def assert_secret(request: Request) -> None: | ||
if (await request.json()).get('secret') != confdad.get('API', 'secret'): | ||
raise HTTPException(401) | ||
|
||
return [Depends(assert_secret)] | ||
|
||
_confdad = ConfigParser(converters={ | ||
'tupleint': lambda l: tuple( | ||
int(v.strip()) for v in l.strip('()').split(',') | ||
) | ||
}) | ||
assert _confdad.read(_CONFIG_FILE_PATH), f'{_CONFIG_FILE_PATH} not found' | ||
|
||
_app = FastAPI( | ||
title='CM Parser API', | ||
description='CM Parser API', | ||
version='1.0.0', | ||
debug=_confdad.getboolean('SERVER', 'debug'), | ||
redoc_url='/' | ||
) | ||
_app.add_middleware( | ||
CORSMiddleware, | ||
allow_origins=['*'], | ||
allow_methods=['POST'] | ||
) | ||
|
||
_app_router = APIRouter() | ||
_app.include_router(_app_router, | ||
dependencies=_register_routes(_app_router, _confdad) | ||
) | ||
|
||
if __name__ == '__main__': | ||
try: | ||
if _confdad.getboolean('SERVER', 'reload'): | ||
uvicorn.run( | ||
'__main__:_app', | ||
port=_confdad.getint('SERVER', 'port'), | ||
reload=True, | ||
reload_includes=[ | ||
'*.ini', '*.json' | ||
] | ||
) | ||
else: | ||
uvicorn.run(_app, port=_confdad.getint('SERVER', 'port')) | ||
except Exception as e: | ||
e_message = f'[{datetime()}][unhandled]: {str(e) or "@empty"}' | ||
|
||
with open('error.log', 'a', encoding='utf-8') as f: | ||
f.write(f'{e_message}\n') | ||
|
||
print(f'\n{e_message}') | ||
|
||
input('\n- Press Enter to exit...') | ||
sys.exit(1) |
Oops, something went wrong.