Skip to content

Commit

Permalink
Add benchmark for matching utf8 bytes
Browse files Browse the repository at this point in the history
Found no meaningful difference between binary vs string.
  • Loading branch information
jc4x4 authored and sjamesr committed Mar 8, 2021
1 parent c4e8120 commit 34f3041
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
*/
package com.google.re2j.benchmark;

import static java.nio.charset.StandardCharsets.UTF_8;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
Expand All @@ -23,8 +25,17 @@ public class BenchmarkFullMatch {
@Param({"JDK", "RE2J"})
private Implementations impl;

@Param({"true", "false"})
private boolean binary;

private Implementations.Pattern pattern;

private String password = "password";
private byte[] password_bytes = password.getBytes(UTF_8);

private String l0ngpassword = "l0ngpassword";
private byte[] l0ngpassword_bytes = "l0ngpassword".getBytes(UTF_8);

@Setup
public void setup() {
pattern =
Expand All @@ -34,7 +45,8 @@ public void setup() {

@Benchmark
public void matched(Blackhole bh) {
Implementations.Matcher matcher = pattern.matcher("password");
Implementations.Matcher matcher =
binary ? pattern.matcher(password_bytes) : pattern.matcher(password);
boolean matches = matcher.matches();
if (!matches) {
throw new AssertionError();
Expand All @@ -44,7 +56,8 @@ public void matched(Blackhole bh) {

@Benchmark
public void notMatched(Blackhole bh) {
Implementations.Matcher matcher = pattern.matcher("l0ngpassword");
Implementations.Matcher matcher =
binary ? pattern.matcher(l0ngpassword_bytes) : pattern.matcher(l0ngpassword);
boolean matches = matcher.matches();
if (matches) {
throw new AssertionError();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@ public class BenchmarkSubMatch {
@Param({"JDK", "RE2J"})
private Implementations impl;

private String html =
new String(readFile("google-maps-contact-info.html"), StandardCharsets.UTF_8);
@Param({"true", "false"})
private boolean binary;

byte[] bytes = readFile("google-maps-contact-info.html");
private String html = new String(bytes, StandardCharsets.UTF_8);

private Implementations.Pattern pattern;

@Setup
Expand All @@ -38,7 +42,7 @@ public void setup() {

@Benchmark
public void findPhoneNumbers(Blackhole bh) {
Implementations.Matcher matcher = pattern.matcher(html);
Implementations.Matcher matcher = binary ? pattern.matcher(bytes) : pattern.matcher(html);
int count = 0;
while (matcher.find()) {
bh.consume(matcher.group());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ public static Pattern compile(Implementations impl, String pattern) {

public abstract Matcher matcher(String str);

public abstract Matcher matcher(byte[] bytes);

public static class JdkPattern extends Pattern {

private final java.util.regex.Pattern pattern;
Expand All @@ -92,6 +94,11 @@ public JdkPattern(String pattern) {
public Matcher matcher(String str) {
return new Matcher.JdkMatcher(pattern.matcher(str));
}

@Override
public Matcher matcher(byte[] bytes) {
return new Matcher.JdkMatcher(pattern.matcher(new String(bytes)));
}
}

public static class Re2Pattern extends Pattern {
Expand All @@ -106,6 +113,11 @@ public Re2Pattern(String pattern) {
public Matcher matcher(String str) {
return new Matcher.Re2Matcher(pattern.matcher(str));
}

@Override
public Matcher matcher(byte[] bytes) {
return new Matcher.Re2Matcher(pattern.matcher(bytes));
}
}
}
}

0 comments on commit 34f3041

Please sign in to comment.