2021-10-05 13:18:25 +00:00
from html . parser import HTMLParser
from urllib . request import urlopen
class YouTube :
2023-07-08 18:08:25 +00:00
y , z = { } , { } # empty on every invokation
2023-08-10 01:11:11 +00:00
video_type = " "
2021-10-05 13:18:25 +00:00
2022-05-23 17:05:10 +00:00
def mesg ( self , msg , t = None ) :
self . util . mesg ( msg , t )
2022-08-23 14:11:47 +00:00
def match_urls ( self , str ) :
2022-05-23 17:05:10 +00:00
r = [
i
for i in str . split ( )
if " https://youtu.be/ " in i
or " https://www.youtube.com/watch?v= " in i
or " https://m.youtube.com/watch?v= " in i
or " https://youtube.com/watch?v= " in i
or " https://www.youtube.com/embed/ " in i
or " https://www.youtube-nocookie.com/embed/ " in i
2022-09-14 15:35:23 +00:00
or " https://music.youtube.com/watch?v= " in i
2022-09-26 21:16:34 +00:00
or " https://youtube.com/shorts/ " in i
or " https://www.youtube.com/shorts/ " in i
2023-08-02 16:03:21 +00:00
or " https://www.youtube.com/clip/ " in i
or " https://youtube.com/clip/ " in i
2022-05-23 17:05:10 +00:00
]
2022-08-23 14:11:47 +00:00
r = list ( dict . fromkeys ( r ) )
2023-03-17 23:32:28 +00:00
n = 0
for i in r :
if not i . startswith ( " http " ) :
r . pop ( n )
n + = 1
2022-05-23 17:05:10 +00:00
return r
def is_embed ( str ) :
return str . startswith ( " https://www.youtube.com/embed/ " ) or str . startswith (
" https://www.youtube-nocookie.com/embed/ "
)
2022-09-14 15:35:23 +00:00
def is_ytmusic ( str ) :
return str . startswith ( " https://music.youtube.com/watch?v= " )
2022-09-26 21:16:34 +00:00
def is_ytshorts ( str ) :
2022-09-26 22:11:51 +00:00
return str . startswith ( " https://youtube.com/shorts/ " ) or str . startswith (
" https://www.youtube.com/shorts/ "
)
2022-09-26 21:16:34 +00:00
2023-08-02 16:03:21 +00:00
def is_clip ( str ) :
return str . startswith ( " https://youtube.com/clip/ " ) or str . startswith (
" https://www.youtube.com/clip/ "
)
2021-10-05 13:18:25 +00:00
class parseprop ( HTMLParser ) :
2023-08-02 16:03:21 +00:00
def __init__ ( self ) :
print ( " yt parse init " )
HTMLParser . __init__ ( self )
2023-08-10 01:11:11 +00:00
self . itemprops_list = [ " name " , " duration " , " uploadDate " , " interactionCount " ]
if YouTube . video_type == " clip " :
self . itemprops_list + = [ " description " ]
2023-08-02 16:03:21 +00:00
print ( " it is a clip! " )
2023-08-10 01:11:11 +00:00
2021-10-05 13:18:25 +00:00
def handle_starttag ( self , tag , attrs ) :
2023-08-10 01:11:11 +00:00
if ( tag != " meta " and tag != " link " ) or (
(
[ i for i in attrs if " itemprop " in i ] == [ ]
and ( " name " , " title " ) not in attrs
)
or ( tag == " meta " and ( " itemprop " , " name " ) in attrs )
) :
2021-10-05 13:18:25 +00:00
return
global z
2023-05-08 14:15:23 +00:00
# print(self,tag,attrs)
2021-10-05 13:18:25 +00:00
for k , v in attrs :
if k == " itemprop " :
2023-08-02 16:03:21 +00:00
if v not in self . itemprops_list :
2021-10-05 13:18:25 +00:00
return
x = [ v ]
2023-05-08 14:15:23 +00:00
if tag == " link " and v == " name " :
x = [ " channelName " ]
2021-10-05 13:18:25 +00:00
elif k == " content " :
2023-07-08 18:08:25 +00:00
if attrs [ 0 ] [ 1 ] == " interactionCount " :
v = int ( v )
2021-10-05 13:18:25 +00:00
x + = [ v ]
2023-08-10 01:11:11 +00:00
elif k == " name " and v == " title " :
x = [ v ]
else :
return
2021-10-05 13:18:25 +00:00
z . update ( { x [ 0 ] : x [ 1 ] } )
# print(x[0],"=",x[1])
def fmt_dur ( dur ) :
h , m , s = 0 , 0 , 0
m = dur [ 2 : ] . split ( " M " )
s = int ( m [ 1 ] [ : - 1 ] )
m = int ( m [ 0 ] )
if m > = 60 :
h = m / / 60
m = round ( ( m / 60 - h ) * 60 )
return f " { h } h { m } m { s } s "
2022-02-07 16:28:19 +00:00
elif h == 0 and m == 0 and s == 0 :
return " LIVE "
elif m == 0 and s != 0 :
return f " { s } s "
2023-07-08 18:08:25 +00:00
elif s == 0 :
return f " { m } m "
2021-10-05 13:18:25 +00:00
else :
return f " { m } m { s } s "
def yt ( self , url ) :
2022-08-23 17:46:32 +00:00
# self.util.mesg("dbg hello")
2022-02-07 16:28:19 +00:00
url = url . rstrip ( " \x01 " )
2023-08-10 01:11:11 +00:00
self . video_type = (
" clip "
if self . is_clip ( url )
else " shorts "
if self . is_ytshorts ( url )
else " music "
if self . is_ytmusic ( url )
else " embed "
if self . is_embed ( url )
else " video "
)
video_type = self . video_type
if video_type == " embed " :
2022-05-23 17:05:10 +00:00
videoId = url . split ( " / " ) [ 4 ]
url = f " https://www.youtube.com/watch?v= { videoId } "
2023-08-10 01:11:11 +00:00
elif video_type == " music " :
2022-09-14 15:35:23 +00:00
for i in url . split ( " ? " ) [ 1 ] . split ( " & " ) :
2022-09-26 22:11:51 +00:00
if i [ 0 : 2 ] == " v= " :
videoId = i [ 2 : ]
2022-09-14 15:35:23 +00:00
url = f " https://www.youtube.com/watch?v= { videoId } "
2023-08-10 01:11:11 +00:00
elif video_type == " shorts " :
2022-09-26 22:11:51 +00:00
videoId = url . split ( " ? " ) [ 0 ] . split ( " / " ) [ - 1 ]
2022-09-26 21:16:34 +00:00
url = f " https://www.youtube.com/watch?v= { videoId } "
2021-10-05 13:18:25 +00:00
global y , z
y , z = { } , { }
p = self . parseprop ( )
2023-07-08 18:08:25 +00:00
# use premature optimization? it should be SLIGHTLY faster
if self . premature_optimization :
url_h , data = urlopen ( url ) , b " "
# <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
# I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
for i in range ( 24 ) :
data + = url_h . readline ( )
data = data . decode ( ) # bytes to utf-8
url_h . close ( )
else :
# just read all of the html
data = urlopen ( url ) . read ( ) . decode ( )
# print(f"\x1b[31m my data is: {data}\x1b[0m")
2021-10-05 13:18:25 +00:00
p . feed ( data )
2022-08-23 17:46:32 +00:00
if y == z == { } :
2022-08-23 19:18:48 +00:00
irc_string = " [ \x03 04Youtube \x03 ] \x03 07ERROR: \x03 08 got no data from server! \x03 15(check your URL for typos!) \x03 "
ansi_string = " [ \x1b [31mYoutube \x1b [0m] \x1b [33;2mERROR: \x1b [33;1m got no data from server! \x1b [37;2m(check your URL for typos!) \x1b [0m "
2022-08-23 17:46:32 +00:00
print ( ansi_string )
2023-03-17 23:32:28 +00:00
return irc_string , True
2021-10-05 13:18:25 +00:00
z . update ( { " duration " : self . fmt_dur ( z [ " duration " ] ) } )
y , z = z , { }
2023-08-10 01:11:11 +00:00
irc_string = f " [ \x03 03Youtube \x03 ] \x02 { y [ ' title ' ] } \x02 ( { y [ ' duration ' ] } ) uploaded by \x1d { y [ ' channelName ' ] } \x1d on { y [ ' uploadDate ' ] } , { y [ ' interactionCount ' ] : , } views "
ansi_string = f " [ \x1b [32mYoutube \x1b [0m] \x1b [1m { y [ ' title ' ] } \x1b [0m ( { y [ ' duration ' ] } ) uploaded by \x1b [03m { y [ ' channelName ' ] } \x1b [0m on { y [ ' uploadDate ' ] } , { y [ ' interactionCount ' ] : , } views "
2021-10-05 13:18:25 +00:00
print ( ansi_string )
2023-03-17 23:32:28 +00:00
return irc_string , False
2022-08-23 14:11:47 +00:00
if __name__ == " __main__ " :
import sys
2023-07-08 18:08:25 +00:00
YouTube . premature_optimization = False
2022-08-23 14:11:47 +00:00
YouTube . yt ( YouTube , sys . argv [ 1 ] )