49 lines
1.4 KiB
Python
49 lines
1.4 KiB
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Quick, dirty Google Podcasts list extractor.
|
|
Uses Beautiful Soup 4 on a saved version of your podcasts subscription page.
|
|
For example, a text-only copy of "https://podcasts.google.com/subscriptions" saved from your browser to disk.
|
|
|
|
By Tildebeast, 2023.
|
|
"""
|
|
|
|
from bs4 import BeautifulSoup
|
|
from collections import OrderedDict
|
|
|
|
# Your input and output filenames. Edit names/paths as required.
|
|
input_filename = "Google Podcasts - Subscriptions.html"
|
|
output_filename = "google-podcasts-names.txt"
|
|
|
|
# Flag to print the contents as the script runs. Otherwise output only goes to a file.
|
|
SHOW_CONTENTS = True
|
|
|
|
# I think these CSS selectors are constant but can't guarantee it.
|
|
class_search_list = ["eWeGpe","yFWEIe"]
|
|
|
|
with open(input_filename, 'r') as f:
|
|
html_data = f.read()
|
|
|
|
#print(html_data)
|
|
|
|
soup = BeautifulSoup(html_data, 'html.parser')
|
|
|
|
#data = soup.find_all("div", attrs={"class": ["eWeGpe","yFWEIe"]}"
|
|
data = soup.find_all("div", attrs={"class": class_search_list})
|
|
|
|
text_only = [x.get_text().strip() for x in data]
|
|
pairs = {x:y for x,y in zip( text_only[0::2], text_only[1::2] ) }
|
|
|
|
sorted_pairs = OrderedDict()
|
|
with open(output_filename, 'w') as f:
|
|
for k in sorted(pairs.keys(), key=str.lower):
|
|
if SHOW_CONTENTS:
|
|
print(f"{k}\t {pairs[k]}")
|
|
f.write(f"{k}\t {pairs[k]}\n")
|
|
|
|
print(f"\nOutput written to file: {output_filename}\n")
|
|
print("\nFinished.\n")
|
|
|
|
|
|
|