Skip to content

Commit

Permalink
Merge pull request #33 from milobella/fixes/various_around_init
Browse files Browse the repository at this point in the history
Various fixes around initialization and verify PLAY_MOVIE feasability
  • Loading branch information
celian-garcia authored Oct 9, 2022
2 parents 33be9f6 + b6e7bc0 commit 225f5dd
Show file tree
Hide file tree
Showing 9 changed files with 792 additions and 81 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ RUN pip install --upgrade pip && \
RUN python -m spacy download fr_core_news_md

# Build the main command
CMD ["sanic", "cerebro.server.app"]
CMD ["sanic", "cerebro.server.app", "--host=0.0.0.0", "--port=9444"]
7 changes: 7 additions & 0 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ $ sanic cerebro.server.app --dev

## Upload and train a model
When the server is running, this is not over. You have two tasks to perform to make it work.
To facilitate these steps scripts has been built in [scripts]() folder.
```bash
cd scripts
./upload_model.sh
./train_model.sh
./poll_model.sh # To repeat until the 503 message disappears /!\ It can take several minutes
```

#### Upload the model (not necessary if the model is already in database)
```bash
Expand Down
13 changes: 9 additions & 4 deletions cerebro.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
cerebro:

features:
use_mongo: false
use_spacy: true

spacy:
model: fr_core_news_md
iterations: 70
iterations: 80
min_score: 0.1
chunk_size: 1000

features:
use_mongo: false
use_spacy: true
mongodb:
url: "mongodb://root:example@mongo:27017/?authSource=admin"
database: cerebro
34 changes: 34 additions & 0 deletions cerebro/repository/nlp_repository_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import List, Dict

from cerebro.repository.nlp_repository import Repository


class NLPRepositoryMemory(Repository):
def __init__(self):
self._samples = {}
self._categories = {}
self._entities = {}

def get_samples(self, model_id: str, start: int, limit: int) -> List[Dict]:
return self._samples[model_id][start: start + limit]

def get_categories(self, model_id: str) -> List[str]:
return self._categories[model_id]

def get_entities(self, model_id: str) -> List[str]:
return self._entities[model_id]

def update(self, model_id: str, samples: List[Dict]):
categories = set([])
[categories.update(sample["categories"]) for sample in samples]
self._categories[model_id] = categories

entities = set([])
[entities.update([ent["name"] for ent in sample["entities"]]) for sample in samples if "entities" in sample]
self._entities[model_id] = entities
self._samples[model_id] = samples

def clear(self, model_id: str):
del self._categories[model_id]
del self._entities[model_id]
del self._samples[model_id]
17 changes: 14 additions & 3 deletions cerebro/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from sanic import Sanic
from sanic.config import Config

from cerebro.repository.nlp_repository import Repository
from cerebro.repository.nlp_repository_fake import NLPRepositoryFake
from cerebro.repository.nlp_repository_memory import NLPRepositoryMemory
from cerebro.repository.nlp_repository_mongo import NLPRepositoryMongo
from cerebro.spacy.spacy_manager import SpaCyModelManager
from cerebro.spacy.spacy_request_service import SpaCyRequestService
Expand Down Expand Up @@ -43,6 +42,15 @@ def _to_uppercase(self, obj: Dict[str, Any]) -> Dict[str, Any]:
return retval


# Initialize logger
logging_format = "[%(asctime)s] %(process)d-%(levelname)s "
logging_format += "%(module)s::%(funcName)s():l%(lineno)d: "
logging_format += "%(message)s"

logging.basicConfig(
format=logging_format,
level=logging.DEBUG
)
logger = logging.getLogger()

# Initialize the sanic app
Expand All @@ -55,7 +63,7 @@ def _to_uppercase(self, obj: Dict[str, Any]) -> Dict[str, Any]:
database=config["CEREBRO"]["MONGODB"]["DATABASE"]
)
else:
repository = NLPRepositoryFake()
repository = NLPRepositoryMemory()

app.add_route(HtmlView.as_view(), '/')
app.add_route(SamplesView.as_view(repository), '/models/<model_id:str>/samples')
Expand Down Expand Up @@ -85,3 +93,6 @@ def _to_uppercase(self, obj: Dict[str, Any]) -> Dict[str, Any]:
"\n\tuse_spacy = true"
"\n ================="
"\n################################################")

if __name__ == "__main__":
app.run()
8 changes: 8 additions & 0 deletions cerebro/spacy/spacy_model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,18 @@ def load_model(self):
if PIPE_ENTITY not in self._nlp.pipe_names:
self._nlp.add_pipe(PIPE_ENTITY, last=True)

entity_pipe = self._nlp.get_pipe(PIPE_ENTITY)
for entity in self._entities:
entity_pipe.add_label(entity)

# === Load categories ===
if PIPE_INTENT not in self._nlp.pipe_names:
self._nlp.add_pipe(PIPE_INTENT, last=True)

intent_pipe = self._nlp.get_pipe(PIPE_INTENT)
for intent in self._intents:
intent_pipe.add_label(intent)

def register_samples(self, samples: List[Dict]):
# Build the train data
train_intent_data = []
Expand Down
3 changes: 3 additions & 0 deletions scripts/poll_model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash

curl -iv 'http://localhost:9444/models/default/train'
Loading

0 comments on commit 225f5dd

Please sign in to comment.