Skip to content

Commit

Permalink
split logic for extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
bishabosha committed Feb 10, 2024
1 parent 7936b3c commit a751468
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 105 deletions.
64 changes: 64 additions & 0 deletions _docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,67 @@ In one pattern, you can extract a typed value `xs: IndexedSeq[Int]` as follows:
"[23, 56, 71]" match
case r"[${r"$xs%d"}...(, )]" => xs.sum // 150
```

## Possible Formats

### String Pattern

e.g. `$foo`, which extracts `val foo: String`.

### Int Pattern

e.g. `$foo%d`, which extracts `val foo: Int`.

### Long Pattern

e.g. `$foo%L`, which extracts `val foo: Long`.

### Float Pattern

e.g. `$foo%f`, which extracts `val foo: Float`.

### Double Pattern

e.g. `$foo%g`, which extracts `val foo: Double`.

### Split Pattern

e.g. `$foo...(<regex>)`, which extracts `val foo: List[String]`.

This is equivalent to extracting with `$foo` and then performing`foo.split(raw"<regex>").toIndexedSeq`.

This means that inside the `<regex>` you may put any valid regex accepted by `scala.util.matching.Regex`.
String escape characters are also not processed within the regex.

There is also a special case where if the first element of the sequence is expected to be empty you can drop it with the `$foo..!(<regex>)` pattern.


Putting this all together, you could split Windows style strings with the following pattern:

```scala sc:nocompile
raw"C:\foo\bar\baz.pdf" match
case r"C:$elems..!(\\)" => elems.mkString("/")
// yields "foo/bar/baz.pdf"
```

### Nested Patterns

The `r` interpolator can also match on `Seq` of strings, arbitrarily nested.

For example

```scala sc:nocompile
val strings: Seq[String] = ???

val foo: Seq[Int] = strings match
case r"$foo%d" => foo
```

or even

```scala sc:nocompile
val stringss: Seq[Seq[String]] = ???

val foo: Seq[Seq[Int]] = stringss match
case r"$foo%d" => foo
```
1 change: 1 addition & 0 deletions project.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Main
//> using scala "3.4.0-RC3"
//> using options -source:future -Yexplicit-nulls
//> using options -project enhanced-string-interpolator -siteroot ${.}

//> using publish.ci.computeVersion "git:tag"
Expand Down
5 changes: 4 additions & 1 deletion src/main/scala/stringmatching/regex/Interpolators.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ object Interpolators:
end PatternElement

/** Holder for the pattern elements described by a string interpolated with `r`. */
case class Pattern(elements: Seq[PatternElement])
enum Pattern:
case Literal(glob: String)
case Single(glob: String, pattern: PatternElement)
case Multiple(glob: String, patterns: Seq[PatternElement])

extension (inline sc: StringContext)
/** use in patterns like `case r"$foo...(, )" => println(foo)` */
Expand Down
66 changes: 43 additions & 23 deletions src/main/scala/stringmatching/regex/Macros.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ object Macros:
'{ new RSStringContext[t]($patternExpr) }
end rsApplyExpr

/** Process a `RSStringContext` into a well-typed call to [[stringmatching.regex.Runtime.extract]]
/** Process a `RSStringContext` into a well-typed call to
* [[stringmatching.regex.Runtime.unsafeExtract]]
*/
def rsUnapplyExpr[R: Type, Base: Type](
rsSCExpr: Expr[RSStringContext[R]],
Expand All @@ -35,16 +36,24 @@ object Macros:
returnType match
case '[t] =>
'{
Runtime.extract[Base, t]($patternExpr.elements, levels = $levelsExpr)($scrutinee)
Runtime.unsafeExtract[Base, t]($patternExpr, levels = $levelsExpr)($scrutinee)
}
end match
end rsUnapplyExpr

private object Reify:

given PatternToExpr: ToExpr[Pattern] with
def apply(pattern: Pattern)(using Quotes): Expr[Pattern] =
'{ Pattern(${ Expr.ofSeq(pattern.elements.map(Expr(_))) }) }
import Pattern.*

def apply(pattern: Pattern)(using Quotes): Expr[Pattern] = pattern match
case Literal(glob) =>
'{ Literal(${ Expr(glob) }) }
case Single(glob, pattern) =>
'{ Single(${ Expr(glob) }, ${ Expr(pattern) }) }
case Multiple(glob, patterns) =>
'{ Multiple(${ Expr(glob) }, ${ Expr.ofSeq(patterns.map(Expr(_))) }) }
end PatternToExpr

given FormatPatternToExpr: ToExpr[FormatPattern] with
import FormatPattern.*
Expand Down Expand Up @@ -145,29 +154,40 @@ object Macros:
case _ => report.errorAndAbort(s"unsupported format: `%$format`")
case rest =>
PatternElement.Glob(globPattern(rest))
Pattern(PatternElement.Glob(globPattern(g)) +: rest0)

val g0 = globPattern(g)

if rest0.isEmpty then Pattern.Literal(g0)
else if rest0.sizeIs == 1 then Pattern.Single(g0, rest0.head)
else Pattern.Multiple(g0, rest0)
end parsed

private def refineResult(pattern: Pattern)(using Quotes): quotes.reflect.TypeRepr =
import quotes.reflect.*
val args = pattern.elements
.drop(1)
.map:
case PatternElement.Glob(_) => TypeRepr.of[String]
case PatternElement.Split(_, _) => TypeRepr.of[IndexedSeq[String]]
case PatternElement.SplitEmpty(_, _) => TypeRepr.of[IndexedSeq[String]]
case PatternElement.Format(format, _) =>
format match
case FormatPattern.AsInt => TypeRepr.of[Int]
case FormatPattern.AsLong => TypeRepr.of[Long]
case FormatPattern.AsDouble => TypeRepr.of[Double]
case FormatPattern.AsFloat => TypeRepr.of[Float]
if args.size == 0 then TypeRepr.of[EmptyTuple]
else if args.size == 1 then args.head
else if args.size <= 22 then AppliedType(defn.TupleClass(args.size).typeRef, args.toList)
else
report.errorAndAbort(s"too many captures: ${args.size} (implementation restriction: max 22)")
end if

def typeOfPattern(element: PatternElement) = element match
case PatternElement.Glob(_) => TypeRepr.of[String]
case PatternElement.Split(_, _) => TypeRepr.of[IndexedSeq[String]]
case PatternElement.SplitEmpty(_, _) => TypeRepr.of[IndexedSeq[String]]
case PatternElement.Format(format, _) =>
format match
case FormatPattern.AsInt => TypeRepr.of[Int]
case FormatPattern.AsLong => TypeRepr.of[Long]
case FormatPattern.AsDouble => TypeRepr.of[Double]
case FormatPattern.AsFloat => TypeRepr.of[Float]

pattern match
case Pattern.Literal(_) => TypeRepr.of[EmptyTuple]
case Pattern.Single(_, pattern) => typeOfPattern(pattern)
case Pattern.Multiple(_, elements) =>
val args = elements.map(typeOfPattern)
if args.size <= 22 then AppliedType(defn.TupleClass(args.size).typeRef, args.toList)
else
report.errorAndAbort(
s"too many captures: ${args.size} (implementation restriction: max 22)"
)
end if
end match
end refineResult

private def wrapping[Base: Type](using Quotes): Int =
Expand Down
Loading

0 comments on commit a751468

Please sign in to comment.