-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_chroma.py
64 lines (46 loc) · 13.8 KB
/
build_chroma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import chromadb
import os
import string
# Create chroma db
client = chromadb.PersistentClient(path="./db")
collection = client.get_or_create_collection("test")
# Create titles and files dictionary
og_titles = ['How to Prevent & Treat Colds & Flu', 'David Goggins: How to Build Immense Inner Strength', 'AMA #14: 2023 Philanthropy, Evening Routine, Light Therapy, Health Metrics & More', 'Rick Rubin: Protocols to Access Creative Energy and Process', 'Dr. Robert Lustig: How Sugar & Processed Foods Impact Your Health', 'LIVE EVENT Q&A: Dr. Andrew Huberman Question & Answer in Chicago, IL', 'Dr. Karen Parker: The Causes & Treatments for Autism', 'Robert Greene: A Process for Finding & Achieving Your Unique Purpose', 'AMA #13: Winter Months & Sickness, Wim Hof Breathing & Stressors', 'Dr. Adam Grant: How to Unlock Your Potential, Motivation & Unique Abilities', 'A Science-Supported Journaling Protocol to Improve Mental & Physical Health', 'LIVE EVENT Q&A: Dr. Andrew Huberman Question & Answer in Toronto, ON', 'Dr. Natalie Crawford: Female Hormone Health, Fertility & Vitality', 'Dr. Michael Eisenberg: Improving Male Sexual Health, Function & Fertility ', 'AMA #12: Thoughts on Longevity Supplements (Resveratrol, NR, NMN, Etc.) & How to Improve Memory', 'Mental Health Toolkit: Tools to Bolster Your Mood & Mental Health', 'Mark Zuckerberg & Dr. Priscilla Chan: Curing All Human Diseases & the Future of Health & Technology', 'Dr. Lisa Feldman Barrett: How to Understand Emotions', 'How to Increase Your Willpower & Tenacity', 'Chris Voss: How to Succeed at Hard Conversations', 'AMA #11: Improve Task Switching & Productivity and Reduce Brain Fog', 'Guest Series | Dr. Paul Conti: Tools and Protocols for Mental Health', 'U.S. Surgeon General Dr. Vivek Murthy: Efforts & Challenges in Promoting Public Health', 'GUEST SERIES | Dr. Paul Conti: How to Build and Maintain Healthy Relationships', 'How to Use Music to Boost Motivation, Mood & Improve Learning', 'GUEST SERIES | Dr. Paul Conti: How to Improve Your Mental Health', 'Journal Club with Dr. Peter Attia | Metformin for Longevity & The Power of Belief Effects', 'GUEST SERIES | Dr. Paul Conti: How to Understand & Assess Your Mental Health', 'Marc Andreessen: How Risk Taking, Innovation & Artificial Intelligence Transform Human Experience', 'AMA #10: Benefits of Nature & “Grounding," Hearing Loss Research & Avoiding Altitude Sickness', 'Goals Toolkit: How to Set & Achieve Your Goals', 'Dr. David Linden: Life, Death & the Neuroscience of Your Unique Experience', 'Dr. Rena Malik: Improving Sexual & Urological Health in Males and Females', 'Ketamine: Benefits and Risks for Depression, PTSD & Neuroplasticity', 'Tony Hawk: Harnessing Passion, Drive & Persistence for Lifelong Success', 'AMA #9: Kratom Risks, Does Infrared Sauna Work & Journaling Benefits', 'Dr. Maya Shankar: How to Shape Your Identity & Goals', 'How to Enhance Performance & Learning by Applying a Growth Mindset', 'Dr. Robert Malenka: How Your Brain’s Reward Circuits Drive Your Choices', 'Science-Supported Tools to Accelerate Your Fitness Goals', 'Dr. Jeffrey Goldberg: How to Improve Your Eye Health & Offset Vision Loss', 'AMA #8: Balancing Caffeine, Decision Fatigue & Social Isolation', 'Tim Ferriss: How to Learn Better & Create Your Best Future', 'The Science of MDMA & Its Therapeutic Uses: Benefits & Risks', 'Dr. Immordino-Yang: How Emotions & Social Factors Impact Learning', 'AMA #7: Cold Exposure, Maximizing REM Sleep & My Next Scientific Studies', 'Adderall, Stimulants & Modafinil for ADHD: Short- & Long-Term Effects', 'Dr. Robin Carhart-Harris: The Science of Psychedelics for Mental Health', 'Dr. Susanna Søberg: How to Use Cold & Heat Exposure to Improve Your Health', 'How Psilocybin Can Rewire Our Brain, Its Therapeutic Benefits & Its Risks', 'Dr. Noam Sobel: How Smells Influence Our Hormones, Health & Behavior', 'AMA #6: Eye Health, Why We Yawn & Increasing Motivation', 'Science-Based Mental Training & Visualization for Improved Learning', 'Dr. Matthew MacDougall: Neuralink & Technologies to Enhance Human Brains', 'The Science of Healthy Hair, Hair Loss and How to Regrow Hair', 'Dr. Elissa Epel: Control Stress for Healthy Eating, Metabolism & Aging', 'Leverage Dopamine to Overcome Procrastination & Optimize Effort', 'AMA #5: Intrusive Thoughts, CGMs, Behavioral Change, Naps & NSDR', 'Dr. Peter Attia: Improve Vitality, Emotional & Physical Health & Lifespan', 'Dr. Satchin Panda: Intermittent Fasting to Improve Health, Cognition & Longevity', 'How to Optimize Your Water Quality & Intake for Health', 'Dr. Oded Rechavi: Genes & the Inheritance of Memories Across Generations', 'AMA #4: Maintain Motivation, Improve REM Sleep, Set Goals, Manage Anxiety & More', 'GUEST SERIES | Dr. Andy Galpin: Optimal Nutrition & Supplementation for Fitness', 'How to Breathe Correctly for Optimal Health, Mood, Learning & Performance', 'GUEST SERIES | Dr. Andy Galpin: Maximize Recovery to Achieve Fitness & Performance Goals', 'Dr. Gina Poe: Use Sleep to Enhance Learning, Memory & Emotional State', 'GUEST SERIES | Dr. Andy Galpin: Optimize Your Training Program for Fitness & Longevity', 'How to Stop Headaches Using Science-Based Approaches', 'GUEST SERIES | Dr. Andy Galpin: How to Build Physical Endurance & Lose Fat ', 'Dr. Sara Gottfried: How to Optimize Female Hormone Health for Vitality & Longevity', 'GUEST SERIES | Dr. Andy Galpin: Optimal Protocols to Build Strength & Grow Muscles', 'How to Optimize Fertility in Males & Females', 'GUEST SERIES | Dr. Andy Galpin: How to Assess & Improve All Aspects of Your Fitness', 'Rick Rubin: How to Access Your Creativity', 'AMA #3: Adaptogens, Fasting & Fertility, Bluetooth/EMF Risks, Cognitive Load Limits & More', 'Developing a Rational Approach to Supplementation for Health & Performance', 'Dr. Sam Harris: Using Meditation to Focus, View Consciousness & Expand Your Mind', 'Jocko Willink: How to Become Resilient, Forge Your Identity & Lead Others', 'The Science of Creativity & How to Enhance Creative Innovation', 'LIVE EVENT Q&A: Dr. Andrew Huberman Question & Answer in New York, NY', 'Dr. Kyle Gillett: Tools for Hormone Optimization in Males', 'AMA #2: Improve Sleep, Reduce Sugar Cravings, Optimal Protein Intake, Stretching Frequency & More', 'Using Caffeine to Optimize Mental & Physical Performance', 'Dr. Lex Fridman: Navigating Conflict, Finding Purpose & Maintaining Drive', 'Dr. Chris Palmer: Diet & Nutrition for Mental Health', 'Science-Based Tools for Increasing Happiness ', 'LIVE EVENT Q&A: Dr. Andrew Huberman Question & Answer in Los Angeles, CA', 'Dr. Layne Norton: The Science of Eating for Health, Fat Loss & Lean Muscle', 'AMA #1: Leveraging Ultradian Cycles, How to Protect Your Brain, Seed Oils Examined and More', 'How Meditation Works & Science-Based Effective Meditations ', 'Dr. Eddie Chang: The Science of Learning & Speaking Languages', 'Fitness Toolkit: Protocol & Tools to Optimize Physical Health', 'Dr. Nolan Williams: Psychedelics & Neurostimulation for Brain Rewiring', 'The Effects of Cannabis (Marijuana) on the Brain & Body', 'Dr. Casey Halpern: Biology & Treatments for Compulsive Behaviors & Binge Eating', 'Nicotine’s Effects on the Brain & Body & How to Quit Smoking or Vaping ', 'Dr. David Anderson: The Biology of Aggression, Mating, & Arousal', 'Focus Toolkit: Tools to Improve Your Focus & Concentration', 'LIVE EVENT Q&A: Dr. Andrew Huberman Question & Answer in Portland, OR', 'Dr. Erich Jarvis: The Neuroscience of Speech, Language & Music', 'What Alcohol Does to Your Body, Brain & Health', 'LIVE EVENT Q&A: Dr. Andrew Huberman Question & Answer in Seattle, WA', 'Dr. Peter Attia: Exercise, Nutrition, Hormones for Vitality & Longevity', 'Sleep Toolkit: Tools for Optimizing Sleep & Sleep-Wake Timing', 'Dr. Emily Balcetis: Tools for Setting & Achieving Goals', 'The Science & Treatment of Bipolar Disorder', 'Dr. Charles Zuker: The Biology of Taste Perception & Sugar Craving', 'Optimize & Control Your Brain Chemistry to Improve Health & Performance', 'Jeff Cavaliere: Optimize Your Exercise Program with Science-Based Tools', 'The Science & Treatment of Obsessive-Compulsive Disorder (OCD) ', 'Ido Portal: The Science & Practice of Movement', 'Improve Flexibility with Research-Supported Stretching Protocols', 'Dr. Paul Conti: Therapy, Treating Trauma & Other Life Challenges', 'The Science & Process of Healing from Grief', 'Dr. Wendy Suzuki: Boost Attention & Memory with Science-Based Tools', 'Understand & Improve Memory Using Science-Based Tools', 'Understanding & Controlling Aggression', 'Dr. Rhonda Patrick: Micronutrients for Health & Longevity', 'The Science & Health Benefits of Deliberate Heat Exposure', 'Using Light (Sunlight, Blue Light & Red Light) to Optimize Health', 'Dr. Kyle Gillett: How to Optimize Your Hormones for Health & Vitality', 'Using Deliberate Cold Exposure for Health and Performance', 'Dr. Andy Galpin: How to Build Strength, Muscle Size & Endurance', 'Controlling Sugar Cravings & Metabolism with Science-Based Tools ', 'Using Salt to Optimize Mental & Physical Performance', 'Dr. Justin Sonnenburg: How to Build, Maintain & Repair Gut Health', 'How to Enhance Your Gut Microbiome for Brain & Overall Health', 'Dr. David Spiegel: Using Hypnosis to Enhance Mental & Physical Health & Performance', 'The Science of Love, Desire and Attachment', 'Using Play to Rewire & Improve Your Brain', 'Optimizing Workspace for Productivity, Focus, & Creativity', 'Dr. Alia Crum: Science of Mindsets for Health & Performance', 'The Science of Setting & Achieving Goals', 'Dr. Jack Feldman: Breathing for Mental & Physical Health & Performance', 'The Science of Making & Breaking Habits', 'Dr. David Sinclair: The Biology of Slowing & Reversing Aging', 'Science of Social Bonding in Family, Friendship & Romantic Love', "Dr. David Berson: Understanding Your Brain's Logic & Function", 'Erasing Fears & Traumas Based on the Modern Neuroscience of Fear', 'Dr. David Buss: How Humans Select & Keep Romantic Partners in Short & Long Term', 'The Science of Gratitude & How to Build a Gratitude Practice', 'Time Perception & Entrainment by Dopamine, Serotonin & Hormones', 'Dr. Duncan French: How to Exercise for Strength Gains & Hormone Optimization', 'Using Your Nervous System to Enhance Your Immune System', 'Dr. Samer Hattar: Timing Your Light, Food, & Exercise for Optimal Sleep, Energy & Mood', 'Nutrients For Brain Health & Performance', 'Effects of Fasting & Time Restricted Eating on Fat Loss & Health', 'Dr. Craig Heller: Using Temperature To Optimize Performance, Brain & Body Health', 'Controlling Your Dopamine For Motivation, Focus & Satisfaction', 'Dr. Matthew Johnson: Psychedelics for Treating Mental Disorders', 'ADHD & How Anyone Can Improve Their Focus', 'Healthy Eating & Eating Disorders - Anorexia, Bulimia, Binging', 'Dr. Robert Sapolsky: Science of Stress, Testosterone & Free Will', 'Understanding & Conquering Depression', 'Dr. Anna Lembke: Understanding & Treating Addiction', 'How to Control Your Sense of Pain & Pleasure', 'Dr. Matthew Walker: The Science & Practice of Perfecting Your Sleep', 'How to Optimize Your Brain-Body Function & Health', 'Dr. Lex Fridman: Machines, Creativity & Love', 'Maximizing Productivity, Physical & Mental Health with Daily Tools', 'The Science of Hearing, Balance & Accelerated Learning', 'Dr. Karl Deisseroth: Understanding & Healing the Mind', 'How Smell, Taste & Pheromone-Like Chemicals Control You', 'The Science of Vision, Eye Health & Seeing Better', 'How to Build Endurance in Your Brain & Body', 'Science & Tools for Muscle Growth, Increasing Strength & Muscular Recovery', 'How to Lose Fat with Science-Based Tools', 'How to Learn Skills Faster', 'Supercharge Exercise Performance & Recovery with Cooling', 'Boost Your Energy & Immune System with Cortisol & Adrenaline ', 'How to Control Your Metabolism by Thyroid & Growth Hormone ', 'How Hormones Control Hunger, Eating & Satiety', 'How to Optimize Testosterone & Estrogen', 'The Science of Sexual Development', 'The Science of Emotions & Relationships', 'How To Increase Motivation & Drive', 'How Foods and Nutrients Control Our Moods', 'Master Stress: Tools for Managing Stress & Anxiety', 'Control Pain & Heal Faster with Your Brain', 'Optimize Your Brain with Science-based Tools', 'How to Learn Faster by Using Failures, Movement & Balance', 'How to Focus to Change Your Brain', 'Understanding and Using Dreams to Learn and to Forget', 'How to Defeat Jetlag, Shift Work & Sleeplessness', 'Using Science to Optimize Sleep, Learning & Metabolism', 'Master Your Sleep & Be More Alert When Awake', 'How Your Brain Works & Changes', 'Welcome to the Huberman Lab Podcast']
def remove_punctuation_and_spaces(text):
# Remove punctuation and spaces from each string in the list
cleaned_strings = [''.join(char for char in string if char.isalnum()) for string in text]
for idx in range(len(cleaned_strings)):
new = cleaned_strings[idx] + ".txt"
cleaned_strings[idx] = new
return cleaned_strings
cleaned_strings = remove_punctuation_and_spaces(og_titles)
# Dict mapping raw file names to episode titles
d = {}
idx = 0
for file in cleaned_strings:
d[file] = og_titles[idx]
idx += 1
# Convert all transcripts into chunks and prepare for insertion to db
documents = []
metadatas = []
dir_path = "./texts"
dir_ = os.listdir(dir_path)
def split_into_sections(text, section_size=254):
words = text.split()
sections = [words[i:i + section_size] for i in range(0, len(words), section_size)]
return sections
for file in dir_:
path = os.path.join(dir_path, file)
with open(path, "r", encoding="unicode_escape") as f:
tmp = f.read()
sections = split_into_sections(tmp)
for i, section in enumerate(sections):
tmp = ' '.join(section)
documents.append(tmp)
metadatas.append({"title": d[file], "paragraph": i+1})
# Add all documents and metadata to the db
collection.add(
documents=documents,
metadatas=metadatas,
ids=[f"id{i}" for i in range(len(documents))]
)