-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathblog_gen_algo_v0.1.py
195 lines (157 loc) · 8.51 KB
/
blog_gen_algo_v0.1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import datetime
import sys
import streamlit as st
from md_toc import build_toc
import xml.etree.ElementTree as ET
from tools.chatgpt import chat_with_open_ai
from tools.decision import require_data_for_prompt, require_better_prompt, find_tone_of_writing
from tools.file import create_file_with_keyword, append_content_to_file
from tools.logger import log_info, setup_logger
from tools.serpapi import get_related_queries, get_image_with_commercial_usage
from tools.subprocess import open_file_with_md_app
from tools.const import SERVICE_NAME
from tools.const import SERVICE_DESCRIPTION
from tools.const import SERVICE_URL
steps_prompts = [
# Step 1
"Step 1: Given the primary keywords - {primary_keywords}, generate a captivating 5-8 words blog title. "
"After that, write a 40-50 words teaser in {tone_of_writing} tone, "
"something that creates curiosity and willingness to read more in reader's mind. "
"Make sure to write in pure markdown format, with the blog title in H1 heading, "
"and teaser in paragraph format.",
# Step 2
"Step 2: On the basis of the user intent for asking {primary_keywords}, set up a base ground of knowledge. "
"Write facts and theories on this topic, add well-known data points and sources here. "
"Use maximum 250 words for the content. Don't reach any conclusion yet. "
"\nMake sure to write in pure markdown format, with headings and subheadings (H2 to H3), "
"paragraphs, lists and text formating (such as bold, italic, strikethrough, etc)."
"\nLink 2-3 other of my blog posts (found in the sitemap posted below) within the content. "
"Make sure to sound natural when linking to other blog posts, i.e., the text can only be slightly altered to accommodate a better context for the link. "
"Make sure to use the anchor text is be the actual title of the other blog post, but rather something in the text that goes along the rationale. "
"Sitemap: {sitemap_urls}",
# Step 3
"Step 3: If applicable, explain step by step how to do the required actions for the user intent in {primary_keywords}. "
"Use maximum 400 words for the content. Don't reach any conclusion yet."
"Make sure to write in pure markdown format, with headings and subheadings (H2 to H3), "
"paragraphs, lists and text formating (such as bold, italic, strikethrough, etc).",
# Step 4
"Step 4: Introduce {service_name}, described as {service_description}"
"Explain to the user how {service_name} can help them with their problem. "
"Make sure to link {service_url} in the content. "
"Demonstrate how to use {service_name} in easy steps. Don't go beyond what is mentioned in the service description. "
"Use maximum 100 words for the content. Don't reach any conclusion yet. "
"Make sure to write in pure markdown format, with headings and subheadings (H2 to H3), "
"paragraphs, lists and text formating (such as bold, italic, strikethrough, etc).",
# Step 5
"Step 5: Generate a conclusion based on the content of this blog. Use {tone_of_writing} tone to"
"ease the user intent to take the next step on {primary_keywords}. "
"Use maximum 150 words for the content."
"Make sure to write in pure markdown format, with headings and subheadings (H1 to H4), "
"paragraphs, lists and text formating (such as bold, italic, strikethrough, etc).",
]
def load_sitemap_and_extract_urls(sitemap_path):
# Parse the XML file
tree = ET.parse(sitemap_path)
root = tree.getroot()
# Namespace, often found in sitemap files
namespace = {'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9'}
# Extract URLs
urls = [elem.text for elem in root.findall('ns:url/ns:loc', namespace)]
return urls
def generate_blog_for_keywords(primary_keywords="knee replacement surgery", service_name=SERVICE_NAME, service_description=SERVICE_DESCRIPTION, service_url=SERVICE_URL):
# Iterate through each example
messages = []
filepath = create_file_with_keyword(primary_keywords)
log_info(f'🗂️ File Created {filepath}')
open_file_with_md_app(filepath)
secondary_keywords = get_related_queries(primary_keywords)
log_info(f'🎬 Primary Keywords: {primary_keywords}')
log_info(f'📗 Secondary Keywords: {secondary_keywords}')
# Create the system message with primary and secondary keywords
system_message_1 = f"SYSTEM: Act as an experienced SEO specialist and experienced content writer. " \
f"Given a blog with topic {primary_keywords}, help in generating rich content " \
f"for SEO optimized blog." \
f"Write custom heading for this response. " \
f"Naturally use primary Keywords: [{primary_keywords}], and " \
f"secondary keywords: [{secondary_keywords}] wherever it fits." \
f"Use John Gruber’s Markdown to format your responses." \
f"Use original content, avoid plagiarism, increase readability."
log_info(f'🤖 System:\n{system_message_1}\n\n')
messages.append({"role": "system", "content": system_message_1})
tone_of_writing = find_tone_of_writing(primary_keywords, messages)
sitemap_path = 'sitemap.xml'
sitemap_urls = load_sitemap_and_extract_urls(sitemap_path)
log_info(f'🗺️ Sitemap URLs: {sitemap_urls}')
i = 1
total_words = 0
already_sourced = []
for step_prompt in steps_prompts:
# Pre-defined prompt
prompt = step_prompt.format(primary_keywords=primary_keywords,
tone_of_writing=tone_of_writing,
service_name=service_name,
service_description=service_description,
service_url=service_url,
sitemap_urls=sitemap_urls
)
log_info(f'⏭️ Step {i} # Predefined Prompt: {prompt}')
messages.append({"role": "user", "content": prompt})
# Check for better prompt
if i > 2:
better_prompt = require_better_prompt(primary_keywords, prompt, messages)
if better_prompt:
prompt = better_prompt
# Add image
add_image = False
if add_image:
image_content, already_sourced = get_image_with_commercial_usage(primary_keywords, prompt, already_sourced)
if image_content:
append_content_to_file(filepath, image_content, st if CLI else None)
# Add News
news_data = require_data_for_prompt(primary_keywords, prompt)
if news_data:
messages.append({"role": "assistant", "content": f"Found news on the topic: {news_data}"})
response = chat_with_open_ai(messages, temperature=0.9)
messages.append({"role": "assistant", "content": response})
append_content_to_file(filepath, response, st if CLI else None)
log_info(f'🔺 ️Completed Step {i}. Words: {len(response.split(" "))}')
i += 1
total_words += len(response.split(" "))
#footer_message = f"🎁 Finished generation at {datetime.datetime.now()}. 📬 Total words: {total_words}"
#append_content_to_file(filepath, footer_message, st if CLI else None)
# Read the generated content
with open(filepath, 'r') as file:
content = file.read()
# Generate ToC
toc = build_toc(filepath)
# Insert ToC at the beginning of the content
content_with_toc = toc + "\n\n" + content
# Rewrite the file with ToC
with open(filepath, 'w') as file:
file.write(content_with_toc)
def run_streamlit_app():
st.title("📝BLOGEN v0.1 (Blog Generation Algorithm)")
# Add a text input field
input_text = st.text_input("Enter some text:")
# Add a submit button
if st.button("Submit"):
# Execute the function with the input text
generate_blog_for_keywords(input_text)
def run_terminal_app(keywords):
generate_blog_for_keywords(keywords, SERVICE_NAME, SERVICE_DESCRIPTION, SERVICE_URL)
if __name__ == "__main__":
CLI = True
setup_logger()
if CLI:
_keywords = " ".join(sys.argv[1:])
if _keywords.strip() == "":
print("Error: keywords not specified.\nUSAGE: python blog_gen_algo_v0.1.py <keywords>")
while True:
if _keywords.strip() == "":
_keywords = input("\nEnter the primary keywords:")
else:
break
log_info('Starting BLOGEN...')
run_terminal_app(_keywords)
else:
run_streamlit_app()