Upload bs-google-podcasts-extractor.py
This commit is contained in:
parent
7c43ba296d
commit
fcb7328371
|
@ -0,0 +1,48 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Quick, dirty Google Podcasts list extractor.
|
||||
Uses Beautiful Soup 4 on a saved version of your podcasts subscription page.
|
||||
For example, a text-only copy of "https://podcasts.google.com/subscriptions" saved from your browser to disk.
|
||||
|
||||
By Tildebeast, 2023.
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from collections import OrderedDict
|
||||
|
||||
# Your input and output filenames. Edit names/paths as required.
|
||||
input_filename = "Google Podcasts - Subscriptions.html"
|
||||
output_filename = "google-podcasts-names.txt"
|
||||
|
||||
# Flag to print the contents as the script runs. Otherwise output only goes to a file.
|
||||
SHOW_CONTENTS = True
|
||||
|
||||
# I think these CSS selectors are constant but can't guarantee it.
|
||||
class_search_list = ["eWeGpe","yFWEIe"]
|
||||
|
||||
with open(input_filename, 'r') as f:
|
||||
html_data = f.read()
|
||||
|
||||
#print(html_data)
|
||||
|
||||
soup = BeautifulSoup(html_data, 'html.parser')
|
||||
|
||||
#data = soup.find_all("div", attrs={"class": ["eWeGpe","yFWEIe"]}"
|
||||
data = soup.find_all("div", attrs={"class": class_search_list})
|
||||
|
||||
text_only = [x.get_text().strip() for x in data]
|
||||
pairs = {x:y for x,y in zip( text_only[0::2], text_only[1::2] ) }
|
||||
|
||||
sorted_pairs = OrderedDict()
|
||||
with open(output_filename, 'w') as f:
|
||||
for k in sorted(pairs.keys(), key=str.lower):
|
||||
if SHOW_CONTENTS:
|
||||
print(f"{k}\t {pairs[k]}")
|
||||
f.write(f"{k}\t {pairs[k]}\n")
|
||||
|
||||
print(f"\nOutput written to file: {output_filename}\n")
|
||||
print("\nFinished.\n")
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue