diff --git a/.env.example b/.env.example index 64edf06..b0eea4a 100644 --- a/.env.example +++ b/.env.example @@ -2,3 +2,4 @@ export SLACK_TEAM= export SLACK_COOKIE= export EMOJI_NAME_PREFIX= export EMOJI_NAME_SUFFIX= +export CONCURRENT_REQUESTS= \ No newline at end of file diff --git a/export.py b/export.py index 2bd49e6..84ae994 100755 --- a/export.py +++ b/export.py @@ -3,17 +3,22 @@ # Export emoji in a Slack team as files # https://github.com/smashwilson/slack-emojinator -from __future__ import print_function - import requests import lxml.html import argparse import os import shutil +import asyncio, aiohttp +import logging from upload import _session +logging.basicConfig(level=logging.INFO, format="%(asctime)-15s\t%(message)s") +logger = logging.getLogger(__name__) + +URL = "https://{team_name}.slack.com/customize/emoji" + def _argparse(): parser = argparse.ArgumentParser( @@ -33,32 +38,65 @@ def _argparse(): default=os.getenv('SLACK_COOKIE'), help='Defaults to the $SLACK_COOKIE environment variable.' ) + parser.add_argument( + '--concurrent-requests', '-r', + default=os.getenv('CONCURRENT_REQUESTS', 200), + help='Maximum concurrent requests. Defaults to the $CONCURRENT_REQUESTS environment variable or 200.' + ) args = parser.parse_args() return args +def concurrent_http_get(num_chunks: int, session: aiohttp.ClientSession): + semaphore = asyncio.Semaphore(num_chunks) -def main(): + async def http_get(url, name): + nonlocal semaphore + with (await semaphore): + response = await session.get(url) + body = await response.content.read() + await response.wait_for_close() + return body, name, url + return http_get + +def handle_response(response, name: str, url: str, directory: str): + logger.info(f"Got {name.ljust(15)} {url}") + ext = url.split(".")[-1] + with open(os.path.join(directory, f"{name}.{ext}"), 'wb') as out: + out.write(response) + +def _async_session(auth_cookie): + return aiohttp.ClientSession(headers={"Cookie": auth_cookie}) + +async def main(): args = _argparse() if not os.path.exists(args.directory): os.makedirs(args.directory) - session = _session(args) - resp = session.get(session.url) - tree = lxml.html.fromstring(resp.text) - urls = tree.xpath(r'//td[@headers="custom_emoji_image"]/span/@data-original') - names = [u.split('/')[-2] for u in urls] + async with _async_session(args.cookie) as session: + endpoint = URL.format(team_name=args.team_name) + logger.info(f"Getting {endpoint}") + resp = await session.get(endpoint) + async with resp: + if resp.status != 200: + logger.error(f"Failed to retrieve emoji list ({resp.status})") + return + text = await resp.text() + tree = lxml.html.fromstring(text) + urls = tree.xpath(r'//td[@headers="custom_emoji_image"]/span/@data-original') + names = [u.split('/')[-2] for u in urls] + + logger.info(f"Parsed {len(names)} emojis") + assert len(names) > 0 + + http_get = concurrent_http_get(args.concurrent_requests, session) + tasks = [http_get(emoji_url, emoji_name) for emoji_name, emoji_url in zip(names, urls) if "alias" not in emoji_url] + for future in asyncio.as_completed(tasks): + data, name, url = await future + handle_response(data, name, url, args.directory) - for emoji_name, emoji_url in zip(names, urls): - if "alias" not in emoji_url: # this does not seem necessary ... - file_extension = emoji_url.split(".")[-1] - request = session.get(emoji_url, stream=True) - if request.status_code == 200: - filename = '%s/%s.%s' % (args.directory, emoji_name, - file_extension) - with open(filename, 'wb') as out_file: - shutil.copyfileobj(request.raw, out_file) - del request if __name__ == '__main__': - main() + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) + diff --git a/requirements.txt b/requirements.txt index 50143c2..afce542 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ beautifulsoup4>=4.4, <5.0 requests>=2.5.3, <3.0 lxml==3.7.3 +aiohttp==2.3.1 \ No newline at end of file