Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Add the following lines in the dependencies section of your `pom.xml` file.
<dependency>
<groupId>com.fulmicoton</groupId>
<artifactId>multiregexp</artifactId>
<version>0.3</version>
<version>0.5.1</version>
</dependency>
```

Expand Down
32 changes: 22 additions & 10 deletions src/main/java/com/fulmicoton/multiregexp/MultiPattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,22 @@ public static MultiPattern of(String... patterns) {
return new MultiPattern(Arrays.asList(patterns));
}

public MultiPatternAutomaton makeAutomatonWithPrefix(String prefix) {
final List<Automaton> automata = new ArrayList<>();
for (final String ptn: this.patterns) {
final String prefixedPattern = prefix + ptn;
public MultiPatternAutomaton makeAutomatonWithPrefix(String prefix, String... exceptions) {
final List<Automaton> automata = new ArrayList<>(this.patterns.size());
for (final String ptn : this.patterns) {
boolean addPrefix = true;
for (String exception : exceptions) {
if (ptn.startsWith(exception)) {
addPrefix = false;
break;
}
}
final String prefixedPattern = (addPrefix ? prefix: "") + ptn;
final Automaton automaton = new RegExp(prefixedPattern).toAutomaton();
automaton.minimize();
automata.add(automaton);
}
return MultiPatternAutomaton.make(automata);
return MultiPatternAutomaton.multithreadedMake(automata);
}

/**
Expand All @@ -44,15 +51,20 @@ public MultiPatternAutomaton makeAutomatonWithPrefix(String prefix) {
* @return A searcher object
*/
public MultiPatternSearcher searcher() {
final MultiPatternAutomaton searcherAutomaton = makeAutomatonWithPrefix(".*");
final List<Automaton> indidivualAutomatons = new ArrayList<>();
for (final String pattern: this.patterns) {
return searcher(true);
}


public MultiPatternSearcher searcher(final boolean tableize) {
final MultiPatternAutomaton searcherAutomaton = makeAutomatonWithPrefix(".*", ".*", "^");
final List<Automaton> individualAutomatons = new ArrayList<>(this.patterns.size());
for (final String pattern : this.patterns) {
final Automaton automaton = new RegExp(pattern).toAutomaton();
automaton.minimize();
automaton.determinize();
indidivualAutomatons.add(automaton);
individualAutomatons.add(automaton);
}
return new MultiPatternSearcher(searcherAutomaton, indidivualAutomatons);
return new MultiPatternSearcher(searcherAutomaton, individualAutomatons, tableize);
}


Expand Down
116 changes: 115 additions & 1 deletion src/main/java/com/fulmicoton/multiregexp/MultiPatternAutomaton.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
package com.fulmicoton.multiregexp;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;

import dk.brics.automaton.Automaton;
import dk.brics.automaton.DkBricsAutomatonHelper;
import dk.brics.automaton.State;

public class MultiPatternAutomaton {
public class MultiPatternAutomaton
implements Serializable {

private static final long serialVersionUID = -8269666436361824366L;

public final int[][] accept;
final boolean[] atLeastOneAccept;
Expand Down Expand Up @@ -56,6 +64,112 @@ static MultiState initialState(List<Automaton> automata) {
return new MultiState(initialStates);
}

static MultiPatternAutomaton multithreadedMake(final List<Automaton> automata) {
for (final Automaton automaton: automata) {
automaton.determinize();
}

final char[] points = DkBricsAutomatonHelper.pointsUnion(automata);

// states that are still to be visited
final Queue<MultiState> statesToVisits = new ConcurrentLinkedQueue<>();
final MultiState initialState = initialState(automata);
statesToVisits.add(initialState);

final Map<Integer, int[]> transitionMap = new ConcurrentHashMap<>();

final Map<MultiState, Integer> multiStateIndex = new ConcurrentHashMap<>();
multiStateIndex.put(initialState, 0);

final int numberOfThreads = Runtime.getRuntime().availableProcessors();

final Object lockObject = new Object();
final List<Thread> activeThreads = Collections.synchronizedList(new ArrayList<Thread>());
final CountDownLatch doneSignal = new CountDownLatch(numberOfThreads);
for (int thread = 0; thread < numberOfThreads; thread++) {
new Thread(new Runnable() {
@Override
public void run() {
activeThreads.add(Thread.currentThread());

while (true) {
MultiState visitingState;
while ((visitingState = statesToVisits.poll()) != null) {
// assert multiStateIndex.containsKey(visitingState);

final int[] curTransitions = new int[points.length];
final int stateId = multiStateIndex.get(visitingState);
transitionMap.put(stateId, curTransitions);

for (int c = 0; c < points.length; c++) {
final MultiState destState = visitingState.step(points[c]);
if (destState.isNull()) {
curTransitions[c] = -1;
} else {
Integer destStateId;
synchronized (multiStateIndex) {
destStateId = multiStateIndex.get(destState);
if (destStateId == null) {
destStateId = multiStateIndex.size();
multiStateIndex.put(destState, destStateId);
statesToVisits.add(destState);
synchronized (lockObject) {
// wake a thread to process destState
lockObject.notify();
}
}
}
curTransitions[c] = destStateId;
}
}
}
activeThreads.remove(Thread.currentThread());
// if there are no active threads then we are done
if (activeThreads.isEmpty()) {
synchronized (lockObject) {
// wake waiting threads so they can end
lockObject.notifyAll();
}
// end this thread
break;
} else {
synchronized (lockObject) {
try {
lockObject.wait();
}
catch (InterruptedException ignore) {
}
}
activeThreads.add(Thread.currentThread());
}
}
doneSignal.countDown();
}
}).start();
}
try {
// wait for all to finish
doneSignal.await();
}
catch (InterruptedException e) {
e.printStackTrace();
}

assert transitionMap.size() == multiStateIndex.size();

final int[] transitions = new int[transitionMap.size() * points.length];
for (final Map.Entry<Integer, int[]> entry : transitionMap.entrySet()) {
System.arraycopy(entry.getValue(), 0, transitions, entry.getKey() * points.length, points.length);
}

final int[][] acceptValues = new int[multiStateIndex.size()][];
for (final Map.Entry<MultiState, Integer> entry : multiStateIndex.entrySet()) {
acceptValues[entry.getValue()] = entry.getKey().toAcceptValues();
}

return new MultiPatternAutomaton(acceptValues, transitions, points, automata.size());
}

static MultiPatternAutomaton make(final List<Automaton> automata) {
for (final Automaton automaton: automata) {
automaton.determinize();
Expand Down
Loading