Generator is for lazy iteration.
Using yield in a function makes it a generator. Like async function, calling a generator does not execute the code. While executing an async function requires say asyncio.run(), a generator executes when it is iterated on say with next(). Normally a generator is consumed by a for loop.
finally block in a generator executes when the generator is garbage-collected, even if the generator was not fully consumed. This may help in cleanup.
To make a generator restartable, encapsulate it in a class within __iter__(self).
class CountDown:
def __init__(self, start):
self._start = start
def __iter__(self):
n = self._start
while n > 0:
yield n
n -= 1
c = CountDown(3)
for n in c:
print('T-minus', n)
d = CountDown(5)
for n in d:
print('Down to', n)
yield from simplifies generator usages.
def countup(stop):
n = 1
while n <= stop:
yield n
n += 1
def countdown(start):
n = start
while n > 0:
yield n
n -= 1
def up_and_down(n):
# # with only yield
# for x in countup(n):
# yield x
# for x in countdown(n):
# yield x
yield from countup(n)
yield from countdown(n)
for x in up_and_down(5):
print(x, end=' ')
# flatten a nested list
def flatten(items):
for i in items:
if isinstance(i, list):
yield from flatten(i)
else:
yield i
a = [1, 2, [3, [4, 5], 6], 7, 8]
for x in flatten(a):
print(x, end=' ')
# -------------- iterative version (avoids recursion limit) ------------
def flatten(items):
stack = [iter(items)]
while stack:
try:
item = next(stack[-1])
if isinstance(item, list):
stack.append(iter(item))
else:
yield item
except StopIteration:
stack.pop()
a = [1, 2, [3, [4, 5], 6], 7, 8]
for x in flatten(a):
print(x, end=' ')
Using generator, deeply nested for loops can be simplified without hurting performance.
Without generator:
# print comments that contain 'spam' in python files from a directory
import pathlib
import re
for path in pathlib.Path('.').rglob('*.py'):
if path.exists():
with path.open('rt', encoding='latin-1') as file:
for line in file:
m = re.match('.*(#.*)$', line)
if m:
comment = m.group(1)
if 'spam' in comment:
print(comment)
With generator:
import pathlib
import re
def get_paths(topdir, pattern):
for path in pathlib.Path(topdir).rglob(pattern):
if path.exists():
yield path
def get_files(paths):
for path in paths:
while path.open('rt', encoding='latin-1') as file:
yield file
def get_lines(files):
for file in files:
yield from file
def get_comments(lines):
for line in lines:
m = re.math('.*(#.*)$', line)
if m:
yield m.group(1)
def print_matching(lines, substring):
for line in lines:
if substring in line:
print(substring)
paths = get_paths('.', '*.py')
files = get_files(paths)
lines = get_lines(files)
comments = get_comments(lines)
print_matching(comments, 'spam')
Enhanced Generator
yield on the right side of an assignment.
def receiver():
print("Ready to receive")
while True:
try:
n = yield
print("Got", n)
except GeneratorExit:
print("Generator is closed")
raise
r = receiver()
r.send(None)
r.send(1)
r.send("Hello")
r.throw(RuntimeError, "I am dead.")
r.close()
Enhanced generators are used to implement @contextmanager.
Accessing local variables is faster than accessing class or instance attributes. To setup a long running task, we can use enhanced generator for improved performance:
def line_receiver():
data = bytearray()
line = None
linecount = 0
while True:
part = yield line
linecount += part.count(b'\n')
data.extend(part)
if linecount > 0:
index = data.index(b'\n')
line = bytes(data[:index+1])
data = data[index+1:]
linecount -= 1
else:
line = None
r = line_receiver()
r.send(None)
print(r.send(b'hello'))
print(r.send(b'world\nit'))
print(r.send(b'works!\n'))
Leave a comment