Skip to content

Commit

Permalink
Add a processing step
Browse files Browse the repository at this point in the history
I was running these locally, in series; this codifies them.
  • Loading branch information
waldoj committed Apr 20, 2024
1 parent 503f2b5 commit c76f2f1
Showing 1 changed file with 57 additions and 6 deletions.
63 changes: 57 additions & 6 deletions finds_you.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import requests
import re

def fetch_books_with_snippets(query, api_key):
url = "https://www.googleapis.com/books/v1/volumes"
Expand Down Expand Up @@ -30,15 +31,65 @@ def fetch_books_with_snippets(query, api_key):

return snippets

def process_snippets(snippets, replacements):
"""
Processes a list of text snippets by applying multiple regex-based
find-and-replace operations.
This function iterates over each snippet in the provided list, applying
each specified replacement operation. Each operation consists of a regex pattern
and a replacement string.
Parameters:
- snippets (list of str): A list of text snippets to be processed.
- replacements (list of tuple): A list of tuples where each tuple contains a
regex pattern (str) and a replacement string (str). Each pattern is applied
to the snippets, replacing matches with the corresponding replacement string.
Returns:
- list of str: A list containing the processed snippets after all replacement
operations have been applied.
"""
processed_snippets = []

for snippet in snippets:
processed_snippet = snippet
for pattern, replacement in replacements:
processed_snippet = re.sub(pattern, replacement, processed_snippet)
if processed_snippet.startswith('finds you'):
pattern = re.compile(r'[\x00-\x1F\x7F]')
if not pattern.search(processed_snippet):
processed_snippets.append(processed_snippet)

processed_snippets = list(set(processed_snippets)) # Make the list unique

return processed_snippets


def save_snippets_to_file(snippets, file_name):
with open(file_name, 'a', encoding='utf-8') as file:
for snippet in snippets:
if "finds you" in snippet:
file.write(snippet + '\n') # Write each snippet in a new paragraph
file.write(snippet + '\n') # Write each snippet in a new paragraph

# Usage example
api_key = 'API_KEY' # Replace with your actual Google API key
query = '"finds you and"' # Replace with your search query
snippets = fetch_books_with_snippets(query, api_key)
save_snippets_to_file(snippets, 'finds_you.txt')
print("Snippets successfully saved to file.")

replacements = [
(r' \. ', '. '),
(r' , ', ', '),
(r' ; ', '; '),
(r' : ', ': '),
(r'^... ', ''),
(r'^(.*)finds you ', 'finds you '),
(r'[\.!\?](.*)', '.'),
(r'<.*?>', ''),
(r'&#39;', '’'),
(r' - ', '-'),
(r'([A-Za-z])- ', '\1'),
(r'&nbsp;', ''),
(r'&quot;', '"')
]
processed_snippets = process_snippets(snippets, replacements)

save_snippets_to_file(processed_snippets, 'finds_you.txt')
print("Processed snippets successfully saved to finds_you.txt.")

0 comments on commit c76f2f1

Please sign in to comment.