Source code for nlpstack.common.automaton

import dataclasses
from collections import defaultdict
from typing import Dict, FrozenSet, Generic, Hashable, Iterable, Optional, Set, Tuple, TypeVar

Symbol = TypeVar("Symbol", bound=Hashable)


[docs]@dataclasses.dataclass class DFAState(Generic[Symbol]): is_final: bool = False transition: Dict[Symbol, "DFAState"] = dataclasses.field(default_factory=dict) def __str__(self) -> str: representation = f"DFAState {hex(id(self))}" if self.is_final: representation += " (final)" return representation def __hash__(self) -> int: return id(self)
[docs] def acceptable_symbols(self) -> Set[Symbol]: """ Return acceptable symbols for this state. """ return set(self.transition)
[docs] def show(self) -> None: visited = set() queue = [self] while queue: state = queue.pop(0) if state in visited: continue visited.add(state) print(state) for symbol, next_state in state.transition.items(): print(f" {symbol} -> {next_state}") queue.append(next_state)
[docs]class DFA(Generic[Symbol]): """ A deterministic finite automaton (DFA) """
[docs] @classmethod def from_symbol(cls, symbol: Symbol) -> "DFA": """ Create a DFA that accepts a single symbol. """ start = DFAState[Symbol]() end = DFAState[Symbol](is_final=True) start.transition[symbol] = end return cls(start)
def __init__(self, state: DFAState) -> None: self.state = state def __repr__(self) -> str: return f"DFA(state={self.state})" def __str__(self) -> str: return f"DFA(state={self.state})"
[docs] def step(self, state: DFAState[Symbol], symbol: Symbol) -> Optional["DFAState"]: return state.transition.get(symbol)
[docs] def accepts(self, symbols: Iterable[Symbol]) -> bool: state: Optional[DFAState] = self.state for symbol in symbols: assert state is not None state = self.step(state, symbol) if state is None: return False return state.is_final if state else False
[docs] def show(self) -> None: visited = set() queue = [self.state] while queue: state = queue.pop(0) if state in visited: continue visited.add(state) print(state) for symbol, next_state in state.transition.items(): print(f" {symbol} -> {next_state}") queue.append(next_state)
[docs]@dataclasses.dataclass class NFAState(Generic[Symbol]): is_final: bool = False transition: Dict[Symbol, "NFAState"] = dataclasses.field(default_factory=dict) epsilon_transitions: Set["NFAState"] = dataclasses.field(default_factory=set) def __str__(self) -> str: representation = f"NFAState {hex(id(self))}" if self.is_final: representation += " (final)" return representation def __hash__(self) -> int: return id(self)
[docs] def allowed_transitions(self) -> Dict[Symbol, Set["NFAState"]]: """ Return allowed transitions for this state. """ transitions = defaultdict(set) stack: Set[Tuple[Optional[Symbol], NFAState]] = {(None, self)} while stack: prev_symbol, state = stack.pop() if prev_symbol is None: for symbol, next_state in state.transition.items(): transitions[symbol].add(next_state) stack.update((symbol, x) for x in next_state.epsilon_transitions) else: transitions[prev_symbol].add(state) stack.update((prev_symbol, next_state) for next_state in state.epsilon_transitions) return { symbol: {state for state in states if state.transition or state.is_final} for symbol, states in transitions.items() }
[docs]class NFA(Generic[Symbol]): """ A nondeterministic finite automaton (NFA) """
[docs] @classmethod def from_epsilon(cls) -> "NFA[Symbol]": """ Create an NFA that accepts an empty string. """ start = NFAState[Symbol]() end = NFAState[Symbol](is_final=True) start.epsilon_transitions.add(end) return cls(start, end)
[docs] @classmethod def from_symbol(cls, symbol: Symbol) -> "NFA": """ Create an NFA that accepts a single symbol. """ start = NFAState[Symbol]() end = NFAState[Symbol](is_final=True) start.transition[symbol] = end return cls(start, end)
def __init__(self, start: NFAState, end: NFAState) -> None: self.start = start self.end = end def __repr__(self) -> str: return f"NFA(start={self.start}, end={self.end})" def __str__(self) -> str: return f"NFA(start={self.start}, end={self.end})" def __eq__(self, other: object) -> bool: if isinstance(other, NFA): return self.start == other.start and self.end == other.end return False def __hash__(self) -> int: return hash((self.start, self.end)) def __add__(self, other: "NFA[Symbol]") -> "NFA[Symbol]": """ Create an NFA that accepts the concatenation of the languages of two NFAs. """ self.end.epsilon_transitions.add(other.start) self.end.is_final = False return NFA(self.start, other.end) def __or__(self, other: "NFA[Symbol]") -> "NFA[Symbol]": """ Create an NFA that accepts the union of the languages of two NFAs. """ start = NFAState[Symbol]() start.epsilon_transitions.add(self.start) start.epsilon_transitions.add(other.start) end = NFAState[Symbol](is_final=True) self.end.epsilon_transitions.add(end) other.end.epsilon_transitions.add(end) self.end.is_final = False other.end.is_final = False return NFA(start, end)
[docs] def closure(self) -> "NFA[Symbol]": """ Create an NFA that accepts the closure of the language of an NFA. """ start = NFAState[Symbol]() end = NFAState[Symbol](is_final=True) start.epsilon_transitions.add(end) start.epsilon_transitions.add(self.start) self.end.epsilon_transitions.add(self.start) self.end.epsilon_transitions.add(end) self.end.is_final = False return NFA(start, end)
[docs] def compile(self) -> DFA[Symbol]: """ Compile a NFA into a DFA. """ visited = set() queue = [frozenset({self.start})] transitions: Dict[FrozenSet[NFAState[Symbol]], Dict[Symbol, Set[NFAState[Symbol]]]] = defaultdict(dict) while queue: states = queue.pop(0) if states in visited: continue visited.add(states) allowed_transitions = defaultdict(set) for state in states: for symbol, next_states in state.allowed_transitions().items(): allowed_transitions[symbol].update(next_states) queue.append(frozenset(next_states)) transitions[states] = allowed_transitions # TODO: minimize DFA dfa_states: Dict[FrozenSet[NFAState[Symbol]], DFAState[Symbol]] = {} for nfa_states, nfa_transitions in transitions.items(): for symbol, next_nfa_states in nfa_transitions.items(): if frozenset(nfa_states) not in dfa_states: dfa_states[frozenset(nfa_states)] = DFAState[Symbol]( is_final=any(state.is_final for state in nfa_states) ) if frozenset(next_nfa_states) not in dfa_states: dfa_states[frozenset(next_nfa_states)] = DFAState[Symbol]( is_final=any(state.is_final for state in next_nfa_states) ) start = dfa_states[frozenset(nfa_states)] end = dfa_states[frozenset(next_nfa_states)] start.transition[symbol] = end return DFA(dfa_states[frozenset({self.start})])
[docs] def show(self) -> None: """ Print the NFA in a human-readable format. """ visited = set() queue = [self.start] while queue: state = queue.pop(0) if state in visited: continue visited.add(state) print(state) for symbol, next_state in state.transition.items(): print(f" {symbol} -> {next_state}") queue.append(next_state) for next_state in state.epsilon_transitions: print(f" (ε) -> {next_state}") queue.append(next_state)