-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup.py
134 lines (112 loc) · 4.38 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
"""
Setup script for YoutubeNLP that handles dependencies, model downloads, and configuration.
"""
import subprocess
import sys
import os
import logging
from pathlib import Path
from utils.nltk_setup import setup_nltk_resources, verify_nltk_resources
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger("YoutubeNLP-Setup")
def install_requirements() -> bool:
"""
Install packages from requirements.txt
Returns:
bool: True if installation was successful
"""
try:
logger.info("Installing requirements from requirements.txt...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
logger.info("Requirements installed successfully.")
return True
except subprocess.CalledProcessError as e:
logger.error(f"Error installing requirements: {e}")
return False
def install_spacy_models() -> bool:
"""
Install spaCy models
Returns:
bool: True if installation was successful
"""
try:
logger.info("Installing spaCy English model...")
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
logger.info("SpaCy model installed successfully.")
return True
except subprocess.CalledProcessError as e:
logger.error(f"Error installing spaCy model: {e}")
logger.warning("Some entity detection features may be limited.")
return False
def create_directories():
"""Create necessary directories for the project"""
dirs = [
'models/cache', # Cache for downloaded models and analysis results
'data/raw', # Raw downloaded comments
'data/processed' # Processed analysis results
]
for dir_path in dirs:
path = Path(dir_path)
if not path.exists():
logger.info(f"Creating directory: {dir_path}")
path.mkdir(parents=True, exist_ok=True)
def check_google_credentials() -> bool:
"""
Check if Google Cloud credentials are properly configured
Returns:
bool: True if credentials are properly set up
"""
creds_var = 'GOOGLE_APPLICATION_CREDENTIALS'
if creds_var not in os.environ:
logger.warning(f"{creds_var} environment variable not set")
logger.info("To enable Google Cloud NLP features:")
logger.info("1. Create a service account key in Google Cloud Console")
logger.info(f"2. Export {creds_var}=/path/to/credentials.json")
logger.info("The application will fall back to local NLP processing.")
return False
creds_path = os.environ[creds_var]
if not os.path.exists(creds_path):
logger.error(f"Credentials file not found: {creds_path}")
return False
try:
# Attempt to load and validate credentials
from google.cloud import language_v1
client = language_v1.LanguageServiceClient()
logger.info("Google Cloud credentials validated successfully")
return True
except Exception as e:
logger.error(f"Error validating Google Cloud credentials: {e}")
return False
def main():
"""Main setup function"""
logger.info("Starting YoutubeNLP setup...")
success = True
# Step 1: Install package requirements
if not install_requirements():
logger.error("Failed to install requirements. Setup cannot continue.")
sys.exit(1)
# Step 2: Set up NLTK resources
if not setup_nltk_resources():
logger.error("Failed to set up critical NLTK resources. Setup cannot continue.")
sys.exit(1)
# Step 3: Install spaCy models (optional)
install_spacy_models()
# Step 4: Create necessary directories
create_directories()
# Step 5: Check credentials (optional)
has_google = check_google_credentials()
# Final status report
logger.info("\nSetup Status:")
logger.info("✓ Package requirements installed")
logger.info("✓ NLTK resources configured")
logger.info(f"{'✓' if has_google else '⚠'} Google Cloud NLP: {'Enabled' if has_google else 'Disabled (using fallback)'}")
logger.info("\nSetup complete! You can now run the application:")
logger.info(" streamlit run app.py")
if __name__ == "__main__":
main()