
257 lines
9.9 KiB
Raw Normal View History

# __all__ = "walk files folders".split()
__all__ = "search_iter files show search folders walk".split()
from typing import Iterator, Iterable, Any
from itertools import permutations, chain
import os, re
from sl4ng import pop, show, multisplit, join, mainame, eq
2021-08-12 18:28:02 +00:00
LOCKOUTS = "Config.Msi*System Volume Information*$Recycle.Bin*C:\\Users\\Administrator".lower().split('*')
def walk(root:str='.', dirs:bool=False, absolute:bool=True) -> Iterator[str]:
Walk a directory's tree yielding paths to any files and/or folders along the way
This will always yield files.
If you only want directories, look for the "folders" function in filey.utils.walkers
Caution: if you pass a relative pathname for top, don't change the
current working directory between resumptions of walk. walk never
changes the current directory, and assumes that the client doesn't
either. - taken from os.walk documentation
root: str|pathlike|Place
path to starting directory
(True -> omit, False -> include) paths to directories
yield (True -> absolute paths, False -> names only)
root = (str, os.path.realpath)[absolute](str(root))
for name in os.listdir(root):
path = os.path.join(root, name)
2021-08-12 18:28:02 +00:00
if os.path.isdir(path) and not name.lower() in LOCKOUTS:
if dirs:
yield (name, path)[absolute]
yield from walk(path, dirs=dirs, absolute=absolute)
yield (name, path)[absolute]
def parse_extensions(extensions:str) -> re.Pattern:
Create a regex parser to check for file extensions.
Note: Separate extensions by one of
[',', '`', '*', ' ']
sep = [i for i in ',`* ' if i in extensions]
pattern = '|'.join(f'\.{i}$' for i in extensions.split(sep[0] if sep else None))
pat = re.compile(pattern, re.I)
return pat
def files(root:str='.', exts:str='', negative:bool=False, absolute:bool=True) -> Iterator[str]:
Search for files along a directory's tree.
Also (in/ex)-clude any whose extension satisfies the requirement
root: str|pathlike|Place
path to starting directory
extensions of interest.
you can pass more than one extension at a time by separating them with one of the following:
[',', '`', '*', ' ']
(True -> omit, False -> include) matching extensions
yield (True -> abspaths, False -> names only)
root = (str, os.path.realpath)[absolute](str(root))
pat = parse_extensions(exts)
predicate = lambda x: not bool( if negative else bool(
2021-08-12 18:28:02 +00:00
name = os.path.split(root)[1]
if not (name.lower() in LOCKOUTS or root.lower() in LOCKOUTS):
for name in os.listdir(root):
path = os.path.join(root, name)
if os.path.isdir(path) and not name.lower() in LOCKOUTS:
yield from files(path, exts=exts, negative=negative, absolute=absolute)
elif predicate(path):
yield (name, path)[absolute]
def folders(root:str='.', absolute:bool=True) -> Iterator[str]:
Search for files along a directory's tree.
Also (in/ex)-clude any whose extension satisfies the requirement
root: str|pathlike|Place
path to starting directory
extensions of interest.
you can pass more than one extension at a time by separating them with any of the following:
[',', '`', '*', ' ']
(True -> omit, False -> include) matching extensions
yield (True -> abspaths, False -> names only)
root = (str, os.path.realpath)[absolute](str(root))
for name in os.listdir(root):
path = os.path.join(root, name)
2021-08-12 18:28:02 +00:00
if os.path.isdir(path) and not name.lower() in LOCKOUTS:
yield (name, path)[absolute]
yield from folders(path, absolute=absolute)
def __term_perms(terms:str, case:int, tight:bool) -> re.Pattern:
compute the regex pattern for posible permutations of search terms
sep = "[\\ _\\-]*" if tight else "(.)*"
if isinstance(terms, str):
terms = terms.split()
terms = map(re.escape, terms)
rack = (sep.join(perm) for perm in permutations(terms))
return re.compile("|".join(rack), case)
def search_iter(iterable:str, terms:Iterable[str], exts:str='', case:bool=False, negative:bool=False, dirs:int=0, strict:int=1, regex:bool=False, names:bool=True) -> Iterator[str]:
Find files matching the given terms within a directory's tree
the directory in which the walking search commences
separate by spaces
the terms sought after
separate by spaces
any file extensions you wish to check for
toggle case sensitivity
Ignored unless dirs==0. Any files matching the terms will be omitted.
0 -> ignore all directories
1 -> directories and files
2 -> directories only
0 -> match any terms in any order
1 -> match all terms in any order (interruptions allowed)
2 -> match all terms in any order (no interruptions allowed)
3 -> match all terms in given order (interruptions)
4 -> match all terms in given order (no interruptions)
combinations of the following are not counted as interruptions:
[' ', '_', '-']
5 -> match string will be compiled as though it was preformatted regex
True -> only yield results whose names match
False -> yield results who match at any level
tight = strict in (2, 4)
sep = "[\\ _\\-]*" if tight else "(.)*"
scope = (str, lambda x: os.path.split(x)[1])[names]
case = 0 if case else re.I
expat = parse_extensions(exts)
tepat = {
0: re.compile("|".join(map(re.escape, terms.split())), case),
1: __term_perms(terms, case, 0),
2: __term_perms(terms, case, 1),
3: re.compile(sep.join(map(re.escape, terms)), case),
4: re.compile(sep.join(map(re.escape, terms)), case),
5: re.compile(terms, case)
}[strict] if not regex else re.compile(terms, case)
predicate = (
lambda i: and,
lambda i: not ( or,
for i in iterable:
if predicate(scope(i)):
yield i
def search(root:str, terms:Iterable[str], exts:str='', case:bool=False, negative:bool=False, dirs:int=0, strict:int=1, regex:bool=False, names:bool=True) -> Iterator[str]:
Find files matching the given terms within a directory's tree
Uses linear search
the directory in which the walking search commences
separate by spaces
the terms sought after
any file extensions you wish to check for
separate by spaces
toggle case sensitivity
Any files/folders with names or extensions matching the terms and exts will be omitted.
0 -> ignore all directories
1 -> directories and files
2 -> directories only
0 -> match any terms in any order
1 -> match all terms in any order (interruptions allowed)
2 -> match all terms in any order (no interruptions allowed)
3 -> match all terms in given order (interruptions)
4 -> match all terms in given order (no interruptions)
combinations of the following are not counted as interruptions:
[' ', '_', '-']
5 -> match string will be compiled as though it was preformatted regex
True -> only yield results whose names match
False -> yield results who match at any level
func = {
0: files,
1: walk,
2: folders,
kwargs = {
0: { "exts": exts, "negative": negative, "absolute": True, },
1: { "dirs": True, "absolute": True, },
2: { "absolute": True, },
yield from search_iter(
(i for i in func(root, **kwargs)),
terms=terms, exts=exts, case=case,
negative=negative, dirs=dirs,
strict=strict, names=names
if __name__ == "__main__":
folder = r'E:\Projects\Monties\2021\file management'
folder = 'C:\\Users\\Kenneth\\Downloads\\byextension'
folder = r"E:\Projects\Monties\2021\media\file_management\filey"
folder = "../../.."
folder = "c:/users/kenneth/pictures"
# box = [*walk(folder, absolute=True)]
# print(all(map(os.path.exists, box)))
# print(__file__ in box)
# show(box, 0, 1)
# box = [*walk(folder, dirs=False, absolute=True)]
# box2 = [*files(folder, exts='', absolute=True)]
# print(all(i in box2 for i in box) and all(i in box for i in box2))
# exts = 'jpg .jpeg pdf'
# show([*files(folder, exts=exts, negative=False, absolute=True)])
# box = [*walk(folder, dirs=False, absolute=True)]
# box2 = [*files(folder, exts=exts, negative=False, absolute=True)]
# box3 = [*files(folder, exts=exts, negative=True, absolute=True)]
# print(eq(map(sorted, (box2+box3, box))))
# box4 = [*folders(folder, True)]
# show(box4, 0, 1)
# show(search(folder, '__init__'))
2021-08-12 18:28:02 +00:00
show(search(folder, '_', 'png'))