preliminary youtube clips support (correct title, incorrect metadata)

This commit is contained in:
jan6 2023-08-02 11:03:21 -05:00
parent e05206d9c6
commit 06bdbd8029
1 changed files with 25 additions and 4 deletions

View File

@ -4,6 +4,7 @@ from urllib.request import urlopen
class YouTube:
y, z = {}, {} # empty on every invokation
video_type=""
def mesg(self, msg, t=None):
self.util.mesg(msg, t)
@ -21,6 +22,8 @@ class YouTube:
or "https://music.youtube.com/watch?v=" in i
or "https://youtube.com/shorts/" in i
or "https://www.youtube.com/shorts/" in i
or "https://www.youtube.com/clip/" in i
or "https://youtube.com/clip/" in i
]
r = list(dict.fromkeys(r))
n = 0
@ -44,7 +47,19 @@ class YouTube:
"https://www.youtube.com/shorts/"
)
def is_clip(str):
return str.startswith("https://youtube.com/clip/") or str.startswith(
"https://www.youtube.com/clip/"
)
class parseprop(HTMLParser):
def __init__(self):
print("yt parse init")
HTMLParser.__init__(self)
self.itemprops_list=["name", "duration", "uploadDate", "interactionCount"]
if YouTube.video_type=="clip":
self.itemprops_list+=["description"]
print("it is a clip!")
def handle_starttag(self, tag, attrs):
if (tag != "meta" and tag != "link") or [
i for i in attrs if "itemprop" in i
@ -54,7 +69,7 @@ class YouTube:
# print(self,tag,attrs)
for k, v in attrs:
if k == "itemprop":
if v not in ["name", "duration", "uploadDate", "interactionCount"]:
if v not in self.itemprops_list:
return
x = [v]
if tag == "link" and v == "name":
@ -87,15 +102,21 @@ class YouTube:
def yt(self, url):
# self.util.mesg("dbg hello")
url = url.rstrip("\x01")
if self.is_embed(url):
self.video_type=("clip" if self.is_clip(url) else
"shorts" if self.is_ytshorts(url) else
"music" if self.is_ytmusic(url) else
"embed" if self.is_embed(url) else
"video")
video_type=self.video_type
if video_type=="embed":
videoId = url.split("/")[4]
url = f"https://www.youtube.com/watch?v={videoId}"
if self.is_ytmusic(url):
elif video_type=="music":
for i in url.split("?")[1].split("&"):
if i[0:2] == "v=":
videoId = i[2:]
url = f"https://www.youtube.com/watch?v={videoId}"
if self.is_ytshorts(url):
elif video_type=="shorts":
videoId = url.split("?")[0].split("/")[-1]
url = f"https://www.youtube.com/watch?v={videoId}"
global y, z