Initial implementation of aiohttp export
Makes the export script require Python 3.6. (async/await were added in 3.5, so maybe there?) I'll need to tidy it up a bit, but this is v1 of functionality. Adds -r/--concurrent-requests for throttling. Defaults to 200. (configured in .env as CONCURRENT_REQUESTS) Caveats: - Adds an aiohttp dependency. - Stops using upload._session, effectively duplicating the functionality to get access to aiohttp.ClientSession. - Adds logging to record the files downloaded. Previously silent. I've also noticed a bug in filename parsing where a bunch of files all named `apple.png` are created. This script parses the URL to retrieve the filename, which exposes this duplication. My version does more parsing in the HTML to detect the :emoji_name: as used by Slack clients. Currently I'm not addressing this.
This commit is contained in:
parent
a9f3ea6059
commit
2d4e355849
|
@ -2,3 +2,4 @@ export SLACK_TEAM=
|
||||||
export SLACK_COOKIE=
|
export SLACK_COOKIE=
|
||||||
export EMOJI_NAME_PREFIX=
|
export EMOJI_NAME_PREFIX=
|
||||||
export EMOJI_NAME_SUFFIX=
|
export EMOJI_NAME_SUFFIX=
|
||||||
|
export CONCURRENT_REQUESTS=
|
76
export.py
76
export.py
|
@ -3,17 +3,22 @@
|
||||||
# Export emoji in a Slack team as files
|
# Export emoji in a Slack team as files
|
||||||
# https://github.com/smashwilson/slack-emojinator
|
# https://github.com/smashwilson/slack-emojinator
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import asyncio, aiohttp
|
||||||
|
import logging
|
||||||
|
|
||||||
from upload import _session
|
from upload import _session
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)-15s\t%(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
URL = "https://{team_name}.slack.com/customize/emoji"
|
||||||
|
|
||||||
|
|
||||||
def _argparse():
|
def _argparse():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
|
@ -33,32 +38,65 @@ def _argparse():
|
||||||
default=os.getenv('SLACK_COOKIE'),
|
default=os.getenv('SLACK_COOKIE'),
|
||||||
help='Defaults to the $SLACK_COOKIE environment variable.'
|
help='Defaults to the $SLACK_COOKIE environment variable.'
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--concurrent-requests', '-r',
|
||||||
|
default=os.getenv('CONCURRENT_REQUESTS', 200),
|
||||||
|
help='Maximum concurrent requests. Defaults to the $CONCURRENT_REQUESTS environment variable or 200.'
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
def concurrent_http_get(num_chunks: int, session: aiohttp.ClientSession):
|
||||||
|
semaphore = asyncio.Semaphore(num_chunks)
|
||||||
|
|
||||||
def main():
|
async def http_get(url, name):
|
||||||
|
nonlocal semaphore
|
||||||
|
with (await semaphore):
|
||||||
|
response = await session.get(url)
|
||||||
|
body = await response.content.read()
|
||||||
|
await response.wait_for_close()
|
||||||
|
return body, name, url
|
||||||
|
return http_get
|
||||||
|
|
||||||
|
def handle_response(response, name: str, url: str, directory: str):
|
||||||
|
logger.info(f"Got {name.ljust(15)} {url}")
|
||||||
|
ext = url.split(".")[-1]
|
||||||
|
with open(os.path.join(directory, f"{name}.{ext}"), 'wb') as out:
|
||||||
|
out.write(response)
|
||||||
|
|
||||||
|
def _async_session(auth_cookie):
|
||||||
|
return aiohttp.ClientSession(headers={"Cookie": auth_cookie})
|
||||||
|
|
||||||
|
async def main():
|
||||||
args = _argparse()
|
args = _argparse()
|
||||||
|
|
||||||
if not os.path.exists(args.directory):
|
if not os.path.exists(args.directory):
|
||||||
os.makedirs(args.directory)
|
os.makedirs(args.directory)
|
||||||
|
|
||||||
session = _session(args)
|
async with _async_session(args.cookie) as session:
|
||||||
resp = session.get(session.url)
|
endpoint = URL.format(team_name=args.team_name)
|
||||||
tree = lxml.html.fromstring(resp.text)
|
logger.info(f"Getting {endpoint}")
|
||||||
urls = tree.xpath(r'//td[@headers="custom_emoji_image"]/span/@data-original')
|
resp = await session.get(endpoint)
|
||||||
names = [u.split('/')[-2] for u in urls]
|
async with resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
logger.error(f"Failed to retrieve emoji list ({resp.status})")
|
||||||
|
return
|
||||||
|
text = await resp.text()
|
||||||
|
tree = lxml.html.fromstring(text)
|
||||||
|
urls = tree.xpath(r'//td[@headers="custom_emoji_image"]/span/@data-original')
|
||||||
|
names = [u.split('/')[-2] for u in urls]
|
||||||
|
|
||||||
|
logger.info(f"Parsed {len(names)} emojis")
|
||||||
|
assert len(names) > 0
|
||||||
|
|
||||||
|
http_get = concurrent_http_get(args.concurrent_requests, session)
|
||||||
|
tasks = [http_get(emoji_url, emoji_name) for emoji_name, emoji_url in zip(names, urls) if "alias" not in emoji_url]
|
||||||
|
for future in asyncio.as_completed(tasks):
|
||||||
|
data, name, url = await future
|
||||||
|
handle_response(data, name, url, args.directory)
|
||||||
|
|
||||||
for emoji_name, emoji_url in zip(names, urls):
|
|
||||||
if "alias" not in emoji_url: # this does not seem necessary ...
|
|
||||||
file_extension = emoji_url.split(".")[-1]
|
|
||||||
request = session.get(emoji_url, stream=True)
|
|
||||||
if request.status_code == 200:
|
|
||||||
filename = '%s/%s.%s' % (args.directory, emoji_name,
|
|
||||||
file_extension)
|
|
||||||
with open(filename, 'wb') as out_file:
|
|
||||||
shutil.copyfileobj(request.raw, out_file)
|
|
||||||
del request
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.run_until_complete(main())
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
beautifulsoup4>=4.4, <5.0
|
beautifulsoup4>=4.4, <5.0
|
||||||
requests>=2.5.3, <3.0
|
requests>=2.5.3, <3.0
|
||||||
lxml==3.7.3
|
lxml==3.7.3
|
||||||
|
aiohttp==2.3.1
|
Loading…
Reference in New Issue