bot6/youtube.py

from html.parser import HTMLParser
from urllib.request import urlopen


class YouTube:
    y, z = {}, {}  # empty on every invokation
    video_type = ""

    def mesg(self, msg, t=None):
        self.util.mesg(msg, t)

    def match_urls(self, str):
        r = [
            i
            for i in str.split()
            if "https://youtu.be/" in i
            or "https://www.youtube.com/watch?v=" in i
            or "https://m.youtube.com/watch?v=" in i
            or "https://youtube.com/watch?v=" in i
            or "https://www.youtube.com/embed/" in i
            or "https://www.youtube-nocookie.com/embed/" in i
            or "https://music.youtube.com/watch?v=" in i
            or "https://youtube.com/shorts/" in i
            or "https://www.youtube.com/shorts/" in i
            or "https://www.youtube.com/clip/" in i
            or "https://youtube.com/clip/" in i
        ]
        r = list(dict.fromkeys(r))
        n = 0
        for i in r:
            if not i.startswith("http"):
                r.pop(n)
            n += 1

        return r

    def is_embed(str):
        return str.startswith("https://www.youtube.com/embed/") or str.startswith(
            "https://www.youtube-nocookie.com/embed/"
        )

    def is_ytmusic(str):
        return str.startswith("https://music.youtube.com/watch?v=")

    def is_ytshorts(str):
        return str.startswith("https://youtube.com/shorts/") or str.startswith(
            "https://www.youtube.com/shorts/"
        )

    def is_clip(str):
        return str.startswith("https://youtube.com/clip/") or str.startswith(
            "https://www.youtube.com/clip/"
        )

    class parseprop(HTMLParser):
        def __init__(self):
            print("yt parse init")
            HTMLParser.__init__(self)
            self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
            if YouTube.video_type == "clip":
                self.itemprops_list += ["description"]
                print("it is a clip!")

        def handle_starttag(self, tag, attrs):
            if (tag != "meta" and tag != "link") or (
                (
                    [i for i in attrs if "itemprop" in i] == []
                    and ("name", "title") not in attrs
                )
                or (tag == "meta" and ("itemprop", "name") in attrs)
            ):
                return
            global z
            # print(self,tag,attrs)
            for k, v in attrs:
                if k == "itemprop":
                    if v not in self.itemprops_list:
                        return
                    x = [v]
                    if tag == "link" and v == "name":
                        x = ["channelName"]
                elif k == "content":
                    if attrs[0][1] == "interactionCount":
                        v = int(v)
                    x += [v]
                elif k == "name" and v == "title":
                    x = [v]
                else:
                    return
            z.update({x[0]: x[1]})
            # print(x[0],"=",x[1])

    def fmt_dur(dur):
        h, m, s = 0, 0, 0
        m = dur[2:].split("M")
        s = int(m[1][:-1])
        m = int(m[0])
        if m >= 60:
            h = m // 60
            m = round((m / 60 - h) * 60)
            return f"{h}h {m}m {s}s"
        elif h == 0 and m == 0 and s == 0:
            return "LIVE"
        elif m == 0 and s != 0:
            return f"{s}s"
        elif s == 0:
            return f"{m}m"
        else:
            return f"{m}m {s}s"

    def yt(self, url):
        # self.util.mesg("dbg hello")
        url = url.rstrip("\x01")
        self.video_type = (
            "clip"
            if self.is_clip(url)
            else "shorts"
            if self.is_ytshorts(url)
            else "music"
            if self.is_ytmusic(url)
            else "embed"
            if self.is_embed(url)
            else "video"
        )
        video_type = self.video_type
        if video_type == "embed":
            videoId = url.split("/")[4]
            url = f"https://www.youtube.com/watch?v={videoId}"
        elif video_type == "music":
            for i in url.split("?")[1].split("&"):
                if i[0:2] == "v=":
                    videoId = i[2:]
            url = f"https://www.youtube.com/watch?v={videoId}"
        elif video_type == "shorts":
            videoId = url.split("?")[0].split("/")[-1]
            url = f"https://www.youtube.com/watch?v={videoId}"
        global y, z
        y, z = {}, {}
        p = self.parseprop()
        # use premature optimization? it should be SLIGHTLY faster
        if self.premature_optimization:
            url_h, data = urlopen(url), b""
            # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
            # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
            for i in range(24):
                data += url_h.readline()
            data = data.decode()  # bytes to utf-8
            url_h.close()
        else:
            # just read all of the html
            data = urlopen(url).read().decode()
        # print(f"\x1b[31m my data is: {data}\x1b[0m")
        p.feed(data)
        if y == z == {}:
            irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
            ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
            print(ansi_string)
            return irc_string, True
        z.update({"duration": self.fmt_dur(z["duration"])})
        y, z = z, {}
        irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
        ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
        print(ansi_string)
        return irc_string, False


if __name__ == "__main__":
    import sys

    YouTube.premature_optimization = False
    YouTube.yt(YouTube, sys.argv[1])
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`from html.parser import HTMLParser`
			`from urllib.request import urlopen`


			`class YouTube:`
idk, added version command, youtube displays view count now... 2023-07-08 18:08:25 +00:00			`y, z = {}, {} # empty on every invokation`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`video_type = ""`
youtube is a thing now! 2021-10-05 13:18:25 +00:00
I think this fixes youtube alternate urls, bonus: now embed links work also! 2022-05-23 17:05:10 +00:00			`def mesg(self, msg, t=None):`
			`self.util.mesg(msg, t)`

enable calling youtube.py from cli 2022-08-23 14:11:47 +00:00			`def match_urls(self, str):`
I think this fixes youtube alternate urls, bonus: now embed links work also! 2022-05-23 17:05:10 +00:00			`r = [`
			`i`
			`for i in str.split()`
			`if "https://youtu.be/" in i`
			`or "https://www.youtube.com/watch?v=" in i`
			`or "https://m.youtube.com/watch?v=" in i`
			`or "https://youtube.com/watch?v=" in i`
			`or "https://www.youtube.com/embed/" in i`
			`or "https://www.youtube-nocookie.com/embed/" in i`
support YouTube Music 2022-09-14 15:35:23 +00:00			`or "https://music.youtube.com/watch?v=" in i`
support youtube shorts 2022-09-26 21:16:34 +00:00			`or "https://youtube.com/shorts/" in i`
			`or "https://www.youtube.com/shorts/" in i`
preliminary youtube clips support (correct title, incorrect metadata) 2023-08-02 16:03:21 +00:00			`or "https://www.youtube.com/clip/" in i`
			`or "https://youtube.com/clip/" in i`
I think this fixes youtube alternate urls, bonus: now embed links work also! 2022-05-23 17:05:10 +00:00			`]`
enable calling youtube.py from cli 2022-08-23 14:11:47 +00:00			`r = list(dict.fromkeys(r))`
prevent echo abuse (ctcp messages, invoking other bots), fix "<urlopen error unknown url type: " errors with youtube, only error once per batch of youtube links 2023-03-17 23:32:28 +00:00			`n = 0`
			`for i in r:`
			`if not i.startswith("http"):`
			`r.pop(n)`
			`n += 1`

I think this fixes youtube alternate urls, bonus: now embed links work also! 2022-05-23 17:05:10 +00:00			`return r`

			`def is_embed(str):`
			`return str.startswith("https://www.youtube.com/embed/") or str.startswith(`
			`"https://www.youtube-nocookie.com/embed/"`
			`)`

support YouTube Music 2022-09-14 15:35:23 +00:00			`def is_ytmusic(str):`
			`return str.startswith("https://music.youtube.com/watch?v=")`

support youtube shorts 2022-09-26 21:16:34 +00:00			`def is_ytshorts(str):`
style format 2022-09-26 22:11:51 +00:00			`return str.startswith("https://youtube.com/shorts/") or str.startswith(`
			`"https://www.youtube.com/shorts/"`
			`)`
support youtube shorts 2022-09-26 21:16:34 +00:00
preliminary youtube clips support (correct title, incorrect metadata) 2023-08-02 16:03:21 +00:00			`def is_clip(str):`
			`return str.startswith("https://youtube.com/clip/") or str.startswith(`
			`"https://www.youtube.com/clip/"`
			`)`

youtube is a thing now! 2021-10-05 13:18:25 +00:00			`class parseprop(HTMLParser):`
preliminary youtube clips support (correct title, incorrect metadata) 2023-08-02 16:03:21 +00:00			`def __init__(self):`
			`print("yt parse init")`
			`HTMLParser.__init__(self)`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]`
			`if YouTube.video_type == "clip":`
			`self.itemprops_list += ["description"]`
preliminary youtube clips support (correct title, incorrect metadata) 2023-08-02 16:03:21 +00:00			`print("it is a clip!")`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`def handle_starttag(self, tag, attrs):`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`if (tag != "meta" and tag != "link") or (`
			`(`
			`[i for i in attrs if "itemprop" in i] == []`
			`and ("name", "title") not in attrs`
			`)`
			`or (tag == "meta" and ("itemprop", "name") in attrs)`
			`):`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`return`
			`global z`
fix youtube titling failing sometimes due to youtube being a f*in annoying, also should be faster by eliminating unnecessary request 2023-05-08 14:15:23 +00:00			`# print(self,tag,attrs)`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`for k, v in attrs:`
			`if k == "itemprop":`
preliminary youtube clips support (correct title, incorrect metadata) 2023-08-02 16:03:21 +00:00			`if v not in self.itemprops_list:`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`return`
			`x = [v]`
fix youtube titling failing sometimes due to youtube being a f*in annoying, also should be faster by eliminating unnecessary request 2023-05-08 14:15:23 +00:00			`if tag == "link" and v == "name":`
			`x = ["channelName"]`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`elif k == "content":`
idk, added version command, youtube displays view count now... 2023-07-08 18:08:25 +00:00			`if attrs[0][1] == "interactionCount":`
			`v = int(v)`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`x += [v]`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`elif k == "name" and v == "title":`
			`x = [v]`
			`else:`
			`return`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`z.update({x[0]: x[1]})`
			`# print(x[0],"=",x[1])`

			`def fmt_dur(dur):`
			`h, m, s = 0, 0, 0`
			`m = dur[2:].split("M")`
			`s = int(m[1][:-1])`
			`m = int(m[0])`
			`if m >= 60:`
			`h = m // 60`
			`m = round((m / 60 - h) * 60)`
			`return f"{h}h {m}m {s}s"`
added nickserv auth 2022-02-07 16:28:19 +00:00			`elif h == 0 and m == 0 and s == 0:`
			`return "LIVE"`
			`elif m == 0 and s != 0:`
			`return f"{s}s"`
idk, added version command, youtube displays view count now... 2023-07-08 18:08:25 +00:00			`elif s == 0:`
			`return f"{m}m"`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`else:`
			`return f"{m}m {s}s"`

			`def yt(self, url):`
youtube module, error when incorrect url (or otherwise failed to parse data from server) 2022-08-23 17:46:32 +00:00			`# self.util.mesg("dbg hello")`
added nickserv auth 2022-02-07 16:28:19 +00:00			`url = url.rstrip("\x01")`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`self.video_type = (`
			`"clip"`
			`if self.is_clip(url)`
			`else "shorts"`
			`if self.is_ytshorts(url)`
			`else "music"`
			`if self.is_ytmusic(url)`
			`else "embed"`
			`if self.is_embed(url)`
			`else "video"`
			`)`
			`video_type = self.video_type`
			`if video_type == "embed":`
I think this fixes youtube alternate urls, bonus: now embed links work also! 2022-05-23 17:05:10 +00:00			`videoId = url.split("/")[4]`
			`url = f"https://www.youtube.com/watch?v={videoId}"`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`elif video_type == "music":`
support YouTube Music 2022-09-14 15:35:23 +00:00			`for i in url.split("?")[1].split("&"):`
style format 2022-09-26 22:11:51 +00:00			`if i[0:2] == "v=":`
			`videoId = i[2:]`
support YouTube Music 2022-09-14 15:35:23 +00:00			`url = f"https://www.youtube.com/watch?v={videoId}"`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`elif video_type == "shorts":`
style format 2022-09-26 22:11:51 +00:00			`videoId = url.split("?")[0].split("/")[-1]`
support youtube shorts 2022-09-26 21:16:34 +00:00			`url = f"https://www.youtube.com/watch?v={videoId}"`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`global y, z`
			`y, z = {}, {}`
			`p = self.parseprop()`
idk, added version command, youtube displays view count now... 2023-07-08 18:08:25 +00:00			`# use premature optimization? it should be SLIGHTLY faster`
			`if self.premature_optimization:`
			`url_h, data = urlopen(url), b""`
			`# <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)`
			`# I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads`
			`for i in range(24):`
			`data += url_h.readline()`
			`data = data.decode() # bytes to utf-8`
			`url_h.close()`
			`else:`
			`# just read all of the html`
			`data = urlopen(url).read().decode()`
			`# print(f"\x1b[31m my data is: {data}\x1b[0m")`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`p.feed(data)`
youtube module, error when incorrect url (or otherwise failed to parse data from server) 2022-08-23 17:46:32 +00:00			`if y == z == {}:`
removed black background from youtube error because xfnw/vulpine whined about it 2022-08-23 19:18:48 +00:00			`irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"`
			`ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"`
youtube module, error when incorrect url (or otherwise failed to parse data from server) 2022-08-23 17:46:32 +00:00			`print(ansi_string)`
prevent echo abuse (ctcp messages, invoking other bots), fix "<urlopen error unknown url type: " errors with youtube, only error once per batch of youtube links 2023-03-17 23:32:28 +00:00			`return irc_string, True`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`z.update({"duration": self.fmt_dur(z["duration"])})`
			`y, z = z, {}`
fixed some edge-case where title might be wrongly set to "true" 2023-08-10 01:11:11 +00:00			`irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"`
			`ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"`
youtube is a thing now! 2021-10-05 13:18:25 +00:00			`print(ansi_string)`
prevent echo abuse (ctcp messages, invoking other bots), fix "<urlopen error unknown url type: " errors with youtube, only error once per batch of youtube links 2023-03-17 23:32:28 +00:00			`return irc_string, False`
enable calling youtube.py from cli 2022-08-23 14:11:47 +00:00

			`if __name__ == "__main__":`
			`import sys`

idk, added version command, youtube displays view count now... 2023-07-08 18:08:25 +00:00			`YouTube.premature_optimization = False`
enable calling youtube.py from cli 2022-08-23 14:11:47 +00:00			`YouTube.yt(YouTube, sys.argv[1])`