Python: Generators

Generator is for lazy iteration.

Using yield in a function makes it a generator. Like async function, calling a generator does not execute the code. While executing an async function requires say asyncio.run(), a generator executes when it is iterated on say with next(). Normally a generator is consumed by a for loop.

finally block in a generator executes when the generator is garbage-collected, even if the generator was not fully consumed. This may help in cleanup.

To make a generator restartable, encapsulate it in a class within __iter__(self).

class CountDown:
  def __init__(self, start):
    self._start = start
  def __iter__(self):
    n = self._start
    while n > 0:
      yield n
      n -= 1
c = CountDown(3)
for n in c:
  print('T-minus', n)
d = CountDown(5)
for n in d:
  print('Down to', n)

yield from simplifies generator usages.

def countup(stop):
  n = 1
  while n <= stop:
    yield n
    n += 1
def countdown(start):
  n = start
  while n > 0:
    yield n
    n -= 1
def up_and_down(n):
  # # with only yield
  # for x in countup(n):
  #   yield x
  # for x in countdown(n):
  #   yield x
  yield from countup(n)
  yield from countdown(n)
for x in up_and_down(5):
  print(x, end=' ')
# flatten a nested list
def flatten(items):
  for i in items:
    if isinstance(i, list):
      yield from flatten(i)
    else:
      yield i
a = [1, 2, [3, [4, 5], 6], 7, 8]
for x in flatten(a):
  print(x, end=' ')
# -------------- iterative version (avoids recursion limit) ------------
def flatten(items):
  stack = [iter(items)]
  while stack:
    try:
      item = next(stack[-1])
      if isinstance(item, list):
        stack.append(iter(item))
      else:
        yield item
    except StopIteration:
      stack.pop()
a = [1, 2, [3, [4, 5], 6], 7, 8]
for x in flatten(a):
  print(x, end=' ')

Using generator, deeply nested for loops can be simplified without hurting performance.

Without generator:

# print comments that contain 'spam' in python files from a directory
import pathlib
import re
for path in pathlib.Path('.').rglob('*.py'):
    if path.exists():
        with path.open('rt', encoding='latin-1') as file:
            for line in file:
                m = re.match('.*(#.*)$', line)
                if m:
                    comment = m.group(1)
                    if 'spam' in comment:
                        print(comment)

With generator:

import pathlib
import re
def get_paths(topdir, pattern):
    for path in pathlib.Path(topdir).rglob(pattern):
        if path.exists():
            yield path
def get_files(paths):
    for path in paths:
        while path.open('rt', encoding='latin-1') as file:
            yield file
def get_lines(files):
    for file in files:
        yield from file
def get_comments(lines):
    for line in lines:
        m = re.math('.*(#.*)$', line)
        if m:
            yield m.group(1)
def print_matching(lines, substring):
    for line in lines:
        if substring in line:
            print(substring)
paths = get_paths('.', '*.py')
files = get_files(paths)
lines = get_lines(files)
comments = get_comments(lines)
print_matching(comments, 'spam')

Enhanced Generator

yield on the right side of an assignment.

def receiver():
  print("Ready to receive")
  while True:
    try:
      n = yield
      print("Got", n)
    except GeneratorExit:
      print("Generator is closed")
      raise
r = receiver()
r.send(None)
r.send(1)
r.send("Hello")
r.throw(RuntimeError, "I am dead.")
r.close()

Enhanced generators are used to implement @contextmanager.

Accessing local variables is faster than accessing class or instance attributes. To setup a long running task, we can use enhanced generator for improved performance:

def line_receiver():
  data = bytearray()
  line = None
  linecount = 0
  while True:
    part = yield line
    linecount += part.count(b'\n')
    data.extend(part)
    if linecount > 0:
      index = data.index(b'\n')
      line = bytes(data[:index+1])
      data = data[index+1:]
      linecount -= 1
    else:
      line = None
r = line_receiver()
r.send(None)
print(r.send(b'hello'))
print(r.send(b'world\nit'))
print(r.send(b'works!\n'))

Leave a comment