refactor into transformers

This commit is contained in:
JOLIMAITRE Matthieu 2024-05-21 04:41:46 +02:00
parent e8a0aa8a64
commit 23529950e9
18 changed files with 330 additions and 150 deletions

View file

@ -1,4 +1,4 @@
from .parser import Parser, just, regex, end from .parser import Parser, just, regex, end
from .result import Result, ParseError from .result import Result, ParseError
from .forward import FwDeclaration from .forward import Declare

View file

@ -1,23 +1,39 @@
from typing import Generic, TypeVar from typing import Generic, Optional, TypeVar
from .transformer import Transformer
from .result import Result from .result import Result
from .parser import Parser from .parser import Parser
P = TypeVar("P") P = TypeVar("P")
class FwDeclaration(Generic[P]): class FutureTransform(Transformer[P]):
parser: None | Parser[P] actual: Optional[Transformer[P]]
def __init__(self) -> None:
self.actual = None
def define(self, actual: Transformer[P]):
if self.actual is not None:
raise Exception("Redefinition of parser.")
self.actual = actual
def parse(self, stream: str, at_index: int) -> Result[P]:
if self.actual is None:
raise Exception("Using forwarded definition of parser without defining first.")
return self.actual.parse(stream, at_index)
P = TypeVar("P")
class Declare(Generic[P]):
parser: Parser[P]
transform: FutureTransform[P]
def __init__(self) -> None: def __init__(self) -> None:
self.parser = None transform = FutureTransform[P]()
self.parser = Parser(transform)
self.transform = transform
def p(self): def p(self):
def inner(stream: str, index: int) -> Result[P]: return self.parser
if self.parser is None:
raise Exception("Using forwarded definition of parser without defining first.")
return self.parser.inner(stream, index)
return Parser(inner)
def define(self, parser: Parser[P]): def define(self, parser: Parser[P]):
if self.parser is not None: self.transform.define(parser.inner)
raise Exception("Redefinition of parser.")
self.parser = parser

View file

@ -1,165 +1,81 @@
from typing import Callable, Generic, TypeVar, Union from dataclasses import dataclass
from .result import Result, Success, Failure, ParseError from typing import Callable, Generic, TypeVar
import re
from .result import ParseError, Failure
from .transformer import Transformer
from .transformers import (
EndTransform,
JustTransform,
RegexTransform,
AndTransform,
OrTransform,
ListTransform,
SepListTransform,
OptionTransform,
MapTransform,
ValueTransform
)
def regex(pattern: str): def regex(pattern: str):
def inner(stream: str, index: int) -> Result[str]: return Parser(RegexTransform(pattern))
do_match = re.match(pattern, stream[index:])
if do_match is None:
return Result.failure(index, set(f"matching /{pattern}/"))
else:
match = do_match[0]
return Result.success(match, index + len(match))
return Parser(inner)
def just(text: str):
length = len(text)
def inner(stream: str, index: int) -> Result[str]:
end = index + length
if stream[index:end] == text:
return Result.success(text, end)
else:
return Result.failure(index, set([text]))
return Parser(inner)
def end() -> "Parser[None]": def just(word: str):
def inner(stream: str, index: int): return Parser(JustTransform(word))
if stream[index:] == "":
return Result.success(None, index)
else: def end():
return Result.failure(index, set()) return Parser(EndTransform())
return Parser(inner)
P = TypeVar("P") P = TypeVar("P")
O = TypeVar("O")
T = TypeVar("T") T = TypeVar("T")
O = TypeVar("O")
@dataclass
class Parser(Generic[P]): class Parser(Generic[P]):
inner: Callable[[str, int], Result[P]] inner: "Transformer[P]"
def __init__(self, inner: Callable[[str, int], Result[P]]): def parse(self, stream: str) -> P:
self.inner = inner parser = self.and_(end()).map(lambda value : value[0])
parsed = parser.inner.parse(stream, 0)
if isinstance(parsed, Failure):
raise ParseError(parsed, stream)
return parsed.value
def parse(self, stream: str): def and_(self, other: "Parser[T]"):
(result, _rest) = self.and_then(end()).parse_part(stream) return Parser(AndTransform(self.inner, other.inner))
(value, _end) = result
return value
def parse_part(self, stream: str): def or_(self, other: "Parser[T]"):
result = self.inner(stream, 0) return Parser(OrTransform(self.inner, other.inner))
if isinstance(result.actual, Success):
rest = stream[result.actual.next_index:]
return (result.actual.value, rest)
else:
raise ParseError(result.actual, stream)
def map(self, transform: Callable[[P], O]) -> "Parser[O]":
def inner(stream: str, index: int):
result = self.inner(stream, 0)
if isinstance(result.actual, Success):
mapped = transform(result.actual.value)
return Result.success(mapped, result.actual.next_index)
return result
return Parser(inner)
def and_then(self, other: "Parser[T]"):
def inner(stream: str, index: int) -> Result[tuple[P, T]]:
result = self.inner(stream, index)
if isinstance(result.actual, Failure):
return Result(result.actual)
value_left = result.actual.value
next_index = result.actual.next_index
result = other.inner(stream, next_index)
if isinstance(result.actual, Failure):
return Result(result.actual)
value_right = result.actual.value
next_index = result.actual.next_index
return Result.success((value_left, value_right), next_index)
return Parser(inner)
def or_else(self, other: "Parser[T]"):
def inner(stream: str, index: int) -> Result[Union[P, T]]:
result_left = self.inner(stream, index)
if isinstance(result_left.actual, Success):
return Result.success(result_left.actual.value, result_left.actual.next_index)
result_right = other.inner(stream, index)
if isinstance(result_right.actual, Success):
return Result.success(result_right.actual.value, result_right.actual.next_index)
return Result.failure(index, result_left.actual.expected.union(result_right.actual.expected))
return Parser(inner)
def repeat(self):
def inner(stream: str, index: int) -> Result[list[P]]:
values = list[P]()
while True:
result = self.inner(stream, index)
if isinstance(result.actual, Failure):
break
values.append(result.actual.value)
if result.actual.next_index == index:
raise Exception("Parsing empty patterns repeatedly.")
index = result.actual.next_index
return Result.success(values, index)
return Parser(inner)
def or_not(self): def or_not(self):
def inner(stream: str, index: int) -> Result[Union[P, None]]: return Parser(OptionTransform(self.inner))
result = self.inner(stream, index)
if isinstance(result.actual, Failure):
return Result.success(None, index)
return Result.success(result.actual.value, result.actual.next_index)
return Parser(inner)
def value(self, value: T) -> "Parser[T]": def repeat(self):
return self.map(lambda _: value) return Parser(ListTransform(self.inner))
def sep_by(self, other: "Parser[T]"): def map(self, transform: Callable[[P], O]):
parser = self.or_not().and_then(other.and_then(self).repeat()) return Parser(MapTransform(self.inner, transform))
def mapping(value: tuple[P | None, list[tuple[T, P]]]):
(first, rest) = value
if first is None: return list[P]()
return [first, *(value for (_sep, value) in rest)]
mapped = parser.map(mapping)
return mapped
# def skip_until(self, other: "Parser[T]"): def set(self, value: T) -> "Parser[T]":
# def inner(stream: str, index: int) -> Result[tuple[P, str, T]]: return Parser(ValueTransform(self.inner, value))
# pass
# return Parser(inner) def sep_by(self, sep: "Parser[T]"):
return Parser(SepListTransform(self.inner, sep.inner))
# | # |
def __or__(self, other: "Parser[T]"): def __or__(self, other: "Parser[T]"):
return self.or_else(other) return self.or_(other)
# & # &
def __and__(self, other: "Parser[T]"): def __and__(self, other: "Parser[T]"):
return self.and_then(other) return self.and_(other)
# >> # >>
def __rshift__(self, other: "Parser[T]"): def __rshift__(self, other: "Parser[T]"):
return self.and_then(other).map(lambda v: v[1]) return self.and_(other).map(lambda v: v[1])
# << # <<
def __lshift__(self, other: "Parser[T]"): def __lshift__(self, other: "Parser[T]"):
return self.and_then(other).map(lambda v: v[0]) return self.and_(other).map(lambda v: v[0])
P = TypeVar("P")
class Transformer(Generic[P]):
def parse(self, stream: str, at_index: int) -> Result[P]:
raise Exception("Abstract method.")
L = TypeVar("L")
R = TypeVar("R")
class AndParser(Transformer[tuple[L, R]]):
def __init__(self, left: Transformer[L], right: Transformer[R]):
self.left = left
self.right = right
def parse(self, stream: str, at_index: int) -> Result[tuple[L, R]]:
result_left = self.left.parse(stream, at_index)
if result_left.actual

1
src/pyalibert/py.typed Normal file
View file

@ -0,0 +1 @@
# Marker file for PEP 561. The mypy package uses inline types.

View file

@ -1,8 +1,11 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Union, Generic, TypeVar from typing import Union, Generic, TypeVar
from .utils import TypeInfo
P = TypeVar("P") P = TypeVar("P")
T = TypeVar("T")
@dataclass @dataclass
class Success(Generic[P]): class Success(Generic[P]):
value: P value: P
@ -13,7 +16,7 @@ class Success(Generic[P]):
class Failure: class Failure:
at_index: int at_index: int
expected: set[str] expected: set[str]
depth: int | None depth: int | None = None
P = TypeVar("P") P = TypeVar("P")
@ -33,5 +36,5 @@ class ParseError(BaseException):
Parsing failed at position {self.failure.at_index} of stream : Parsing failed at position {self.failure.at_index} of stream :
${fail_section} ${fail_section}
Expected one of: {failure.expected} Expected one of: {failure.expected}
""" """.strip()
super().__init__(message) super().__init__(message)

View file

@ -0,0 +1,11 @@
from abc import abstractmethod
from typing import Generic, TypeVar
from .result import Result
P = TypeVar("P")
class Transformer(Generic[P]):
@abstractmethod
def parse(self, stream: str, at_index: int) -> Result[P]:
raise Exception("Abstract method.")

View file

@ -0,0 +1,13 @@
from .end import EndTransform
from .just import JustTransform
from .regex import RegexTransform
from .and_ import AndTransform
from .or_ import OrTransform
from .list import ListTransform
from .sep_list import SepListTransform
from .option import OptionTransform
from .map import MapTransform
from .value import ValueTransform

View file

@ -0,0 +1,23 @@
from dataclasses import dataclass
from typing import TypeVar
from ..result import Result, Success, Failure
from ..transformer import Transformer
L = TypeVar("L")
R = TypeVar("R")
@dataclass
class AndTransform(Transformer[tuple[L, R]]):
def __init__(self, left: Transformer[L], right: Transformer[R]):
self.left = left
self.right = right
def parse(self, stream: str, at_index: int) -> Result[tuple[L, R]]:
result_left = self.left.parse(stream, at_index)
if isinstance(result_left, Failure):
return result_left
result_right = self.right.parse(stream, result_left.next_index)
if isinstance(result_right, Failure):
return result_right
return Success((result_left.value, result_right.value), result_right.next_index)

View file

@ -0,0 +1,15 @@
from dataclasses import dataclass
from ..result import Result, Success, Failure
from ..transformer import Transformer
@dataclass
class EndTransform(Transformer[None]):
def __init__(self) -> None:
pass
def parse(self, stream: str, at_index: int) -> Result[None]:
if len(stream) == at_index:
return Success(None, at_index)
return Failure(at_index, set("<end>"))

View file

@ -0,0 +1,19 @@
from dataclasses import dataclass
from ..result import Result, Success, Failure
from ..transformer import Transformer
@dataclass
class JustTransform(Transformer[str]):
def __init__(self, word: str):
self.word = word
self.word_len = len(self.word)
def parse(self, stream: str, at_index: int) -> Result[str]:
end_index = at_index + self.word_len
prefix = stream[at_index:end_index]
if prefix == self.word:
return Success(self.word, end_index)
return Failure(at_index, set([self.word]))

View file

@ -0,0 +1,24 @@
from dataclasses import dataclass
from typing import TypeVar
from ..result import Result, Success, Failure
from ..transformer import Transformer
T = TypeVar("T")
@dataclass
class ListTransform(Transformer[list[T]]):
def __init__(self, item: Transformer[T]):
self.item = item
def parse(self, stream: str, at_index: int) -> Result[list[T]]:
values = list[T]()
while True:
result = self.item.parse(stream, at_index)
if isinstance(result, Failure):
break
values.append(result.value)
if result.next_index == at_index:
raise Exception("Parsing empty patterns repeatedly.")
at_index = result.next_index
return Success(values, at_index)

View file

@ -0,0 +1,21 @@
from dataclasses import dataclass
from typing import Callable, TypeVar
from ..result import Result, Success, Failure
from ..transformer import Transformer
I = TypeVar("I")
O = TypeVar("O")
@dataclass
class MapTransform(Transformer[O]):
def __init__(self, value: Transformer[I], transform: Callable[[I], O]):
self.value = value
self.transform = transform
def parse(self, stream: str, at_index: int) -> Result[O]:
result = self.value.parse(stream, at_index)
if isinstance(result, Failure):
return result
transformed = self.transform(result.value)
return Success(transformed, result.next_index)

View file

@ -0,0 +1,18 @@
from dataclasses import dataclass
from typing import Optional, TypeVar
from ..result import Result, Success
from ..transformer import Transformer
T = TypeVar("T")
@dataclass
class OptionTransform(Transformer[Optional[T]]):
def __init__(self, value: Transformer[T]):
self.value = value
def parse(self, stream: str, at_index: int) -> Result[Optional[T]]:
result = self.value.parse(stream, at_index)
if isinstance(result, Success):
return Success(result.value, result.next_index)
return Success(None, at_index)

View file

@ -0,0 +1,23 @@
from dataclasses import dataclass
from typing import TypeVar, Union
from ..result import Result, Success, Failure
from ..transformer import Transformer
L = TypeVar("L")
R = TypeVar("R")
@dataclass
class OrTransform(Transformer[Union[L, R]]):
def __init__(self, left: Transformer[L], right: Transformer[R]):
self.left = left
self.right = right
def parse(self, stream: str, at_index: int) -> Result[Union[L, R]]:
result_left = self.left.parse(stream, at_index)
if isinstance(result_left, Success):
return Success(result_left.value, result_left.next_index)
result_right = self.right.parse(stream, at_index)
if isinstance(result_right, Success):
return Success(result_right.value, result_right.next_index)
return Failure(at_index, result_left.expected.union(result_right.expected))

View file

@ -0,0 +1,19 @@
from dataclasses import dataclass
import re
from ..result import Result, Success, Failure
from ..transformer import Transformer
@dataclass
class RegexTransform(Transformer[str]):
def __init__(self, pattern: str):
self.pattern = pattern
def parse(self, stream: str, at_index: int) -> Result[str]:
do_match = re.match(self.pattern, stream[at_index:])
if do_match is None:
return Failure(at_index, set(f"<matching '{self.pattern}'>"))
else:
match = do_match[0]
return Success(match, at_index + len(match))

View file

@ -0,0 +1,35 @@
from dataclasses import dataclass
from typing import Optional, TypeVar
from ..result import Result
from ..transformer import Transformer
from .map import MapTransform
from .option import OptionTransform
from .and_ import AndTransform
from .list import ListTransform
T = TypeVar("T")
S = TypeVar("S")
@dataclass
class SepListTransform(Transformer[list[T]]):
def __init__(self, items: Transformer[T], sep: Transformer[S]):
self.items = items
self.sep = sep
first_transformer = OptionTransform(items)
rest_transformer = ListTransform(AndTransform(sep, items))
total_transformer = AndTransform(first_transformer, rest_transformer)
self.actual = MapTransform(total_transformer,SepListTransform.mapping)
def parse(self, stream: str, at_index: int) -> Result[list[T]]:
return self.actual.parse(stream, at_index)
@staticmethod
def mapping(value: tuple[Optional[T], list[tuple[S, T]]]):
items = list[T]()
if value[0] is not None:
items.append(value[0])
for (_sep, item) in value[1]:
items.append(item)
return items

View file

@ -0,0 +1,18 @@
from dataclasses import dataclass
from typing import TypeVar
from ..result import Result
from ..transformer import Transformer
from .map import MapTransform
I = TypeVar("I")
T = TypeVar("T")
@dataclass
class ValueTransform(Transformer[T]):
def __init__(self, ignored: Transformer[I], value: T):
self.actual = MapTransform(ignored, lambda _: value)
def parse(self, stream: str, at_index: int) -> Result[T]:
return self.actual.parse(stream, at_index)

5
src/pyalibert/utils.py Normal file
View file

@ -0,0 +1,5 @@
from typing import Generic, TypeVar
T = TypeVar("T")
class TypeInfo(Generic[T]):
pass