172 lines
6.1 KiB
Python
172 lines
6.1 KiB
Python
from html.parser import HTMLParser
|
|
from urllib.request import urlopen
|
|
|
|
|
|
class YouTube:
|
|
y, z = {}, {} # empty on every invokation
|
|
video_type = ""
|
|
|
|
def mesg(self, msg, t=None):
|
|
self.util.mesg(msg, t)
|
|
|
|
def match_urls(self, str):
|
|
r = [
|
|
i
|
|
for i in str.split()
|
|
if "https://youtu.be/" in i
|
|
or "https://www.youtube.com/watch?v=" in i
|
|
or "https://m.youtube.com/watch?v=" in i
|
|
or "https://youtube.com/watch?v=" in i
|
|
or "https://www.youtube.com/embed/" in i
|
|
or "https://www.youtube-nocookie.com/embed/" in i
|
|
or "https://music.youtube.com/watch?v=" in i
|
|
or "https://youtube.com/shorts/" in i
|
|
or "https://www.youtube.com/shorts/" in i
|
|
or "https://www.youtube.com/clip/" in i
|
|
or "https://youtube.com/clip/" in i
|
|
]
|
|
r = list(dict.fromkeys(r))
|
|
n = 0
|
|
for i in r:
|
|
if not i.startswith("http"):
|
|
r.pop(n)
|
|
n += 1
|
|
|
|
return r
|
|
|
|
def is_embed(str):
|
|
return str.startswith("https://www.youtube.com/embed/") or str.startswith(
|
|
"https://www.youtube-nocookie.com/embed/"
|
|
)
|
|
|
|
def is_ytmusic(str):
|
|
return str.startswith("https://music.youtube.com/watch?v=")
|
|
|
|
def is_ytshorts(str):
|
|
return str.startswith("https://youtube.com/shorts/") or str.startswith(
|
|
"https://www.youtube.com/shorts/"
|
|
)
|
|
|
|
def is_clip(str):
|
|
return str.startswith("https://youtube.com/clip/") or str.startswith(
|
|
"https://www.youtube.com/clip/"
|
|
)
|
|
|
|
class parseprop(HTMLParser):
|
|
def __init__(self):
|
|
print("yt parse init")
|
|
HTMLParser.__init__(self)
|
|
self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
|
|
if YouTube.video_type == "clip":
|
|
self.itemprops_list += ["description"]
|
|
print("it is a clip!")
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if (tag != "meta" and tag != "link") or (
|
|
(
|
|
[i for i in attrs if "itemprop" in i] == []
|
|
and ("name", "title") not in attrs
|
|
)
|
|
or (tag == "meta" and ("itemprop", "name") in attrs)
|
|
):
|
|
return
|
|
global z
|
|
# print(self,tag,attrs)
|
|
for k, v in attrs:
|
|
if k == "itemprop":
|
|
if v not in self.itemprops_list:
|
|
return
|
|
x = [v]
|
|
if tag == "link" and v == "name":
|
|
x = ["channelName"]
|
|
elif k == "content":
|
|
if attrs[0][1] == "interactionCount":
|
|
v = int(v)
|
|
x += [v]
|
|
elif k == "name" and v == "title":
|
|
x = [v]
|
|
else:
|
|
return
|
|
z.update({x[0]: x[1]})
|
|
# print(x[0],"=",x[1])
|
|
|
|
def fmt_dur(dur):
|
|
h, m, s = 0, 0, 0
|
|
m = dur[2:].split("M")
|
|
s = int(m[1][:-1])
|
|
m = int(m[0])
|
|
if m >= 60:
|
|
h = m // 60
|
|
m = round((m / 60 - h) * 60)
|
|
return f"{h}h {m}m {s}s"
|
|
elif h == 0 and m == 0 and s == 0:
|
|
return "LIVE"
|
|
elif m == 0 and s != 0:
|
|
return f"{s}s"
|
|
elif s == 0:
|
|
return f"{m}m"
|
|
else:
|
|
return f"{m}m {s}s"
|
|
|
|
def yt(self, url):
|
|
# self.util.mesg("dbg hello")
|
|
url = url.rstrip("\x01")
|
|
self.video_type = (
|
|
"clip"
|
|
if self.is_clip(url)
|
|
else "shorts"
|
|
if self.is_ytshorts(url)
|
|
else "music"
|
|
if self.is_ytmusic(url)
|
|
else "embed"
|
|
if self.is_embed(url)
|
|
else "video"
|
|
)
|
|
video_type = self.video_type
|
|
if video_type == "embed":
|
|
videoId = url.split("/")[4]
|
|
url = f"https://www.youtube.com/watch?v={videoId}"
|
|
elif video_type == "music":
|
|
for i in url.split("?")[1].split("&"):
|
|
if i[0:2] == "v=":
|
|
videoId = i[2:]
|
|
url = f"https://www.youtube.com/watch?v={videoId}"
|
|
elif video_type == "shorts":
|
|
videoId = url.split("?")[0].split("/")[-1]
|
|
url = f"https://www.youtube.com/watch?v={videoId}"
|
|
global y, z
|
|
y, z = {}, {}
|
|
p = self.parseprop()
|
|
# use premature optimization? it should be SLIGHTLY faster
|
|
if self.premature_optimization:
|
|
url_h, data = urlopen(url), b""
|
|
# <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
|
|
# I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
|
|
for i in range(24):
|
|
data += url_h.readline()
|
|
data = data.decode() # bytes to utf-8
|
|
url_h.close()
|
|
else:
|
|
# just read all of the html
|
|
data = urlopen(url).read().decode()
|
|
# print(f"\x1b[31m my data is: {data}\x1b[0m")
|
|
p.feed(data)
|
|
if y == z == {}:
|
|
irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
|
|
ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
|
|
print(ansi_string)
|
|
return irc_string, True
|
|
z.update({"duration": self.fmt_dur(z["duration"])})
|
|
y, z = z, {}
|
|
irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
|
|
ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
|
|
print(ansi_string)
|
|
return irc_string, False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
YouTube.premature_optimization = False
|
|
YouTube.yt(YouTube, sys.argv[1])
|