<address id="ousso"></address>
<form id="ousso"><track id="ousso"><big id="ousso"></big></track></form>
  1. java語言

    如何使用Java實現AC自動機全文檢索實例

    時間:2025-04-30 03:34:33 java語言 我要投稿
    • 相關推薦

    如何使用Java實現AC自動機全文檢索實例

      導語:如何使用Java實現AC自動機全文檢索,下面是小編給大家推薦的代碼實現過程,大家可以參考閱讀,更多詳情請關注應屆畢業生考試網。

      第一步,構建Trie樹,定義Node類型:

      /**

      * Created by zhaoyy on 2017/2/7.

      */

      interface Node {

      char value();

      boolean exists();

      boolean isRoot();

      Node parent();

      Node childOf(char c);

      Node fail();

      void setFail(Node node);

      void setExists(boolean exists);

      void add(Node child);

      List<Node> children();

      }

      第二步,實現兩種Node,如果詞匯全是可打印的ASCII字符,就采用AsciiNode,否則(比如包含漢字),使用基于hash表的MapNode;這兩種Node均集成自AbstractNode:

      /**

      * Created by zhaoyy on 2017/2/8.

      */

      abstract class AbstractNode implements Node {

      private static final char EMPTY = '\0';

      private final char value;

      private final Node parent;

      private boolean exists;

      private Node fail;

      public AbstractNode(Node parent, char value) {

      this.parent = parent;

      this.value = value;

      this.exists = false;

      this.fail = null;

      }

      public AbstractNode() {

      this(null, EMPTY);

      }

      private static String tab(int n) {

      StringBuilder builder = new StringBuilder();

      for (int i = 0; i < n; i++) {

      builder.append('\t');

      }

      return builder.toString();

      }

      private static String toString(Node node, int depth) {

      StringBuilder builder = new StringBuilder();

      String tab = tab(depth);

      Node fail = node.fail();

      Node parent = node.parent();

      builder

      .append(tab)

      .append('<')

      .append(node.value())

      .append(" exists=\"")

      .append(node.exists())

      .append('"')

      .append(" parent=\"")

      .append(parent == null ? "null" : parent.value())

      .append('"')

      .append(" fail=\"")

      .append(fail == null ? "null" : fail.value())

      .append("\">\r\n");

      for (Node child : node.children())

      builder.append(toString(child, depth + 1));

      builder

      .append(tab)

      .append("</")

      .append(node.value())

      .append(">\r\n");

      return builder.toString();

      }

      @Override

      public char value() {

      return value;

      }

      @Override

      public boolean exists() {

      return exists;

      }

      @Override

      public boolean isRoot() {

      return value == EMPTY;

      }

      @Override

      public Node parent() {

      return parent;

      }

      @Override

      public Node fail() {

      return fail;

      }

      @Override

      public void setFail(Node node) {

      this.fail = node;

      }

      @Override

      public void setExists(boolean exists) {

      this.exists = exists;

      }

      @Override

      public String toString() {

      return toString(this, 0);

      }

      }

      /////////////////////////////////////////////////////////////////////////////////////////////

      /**

      * Created by zhaoyy on 2017/2/8.

      */

      final class AsciiNode extends AbstractNode implements Node {

      private static final char FROM = 32;

      private static final char TO = 126;

      private final Node[] children;

      public AsciiNode() {

      super();

      this.children = new Node[TO - FROM + 1];

      }

      public AsciiNode(Node parent, char value) {

      super(parent, value);

      this.children = new Node[TO - FROM + 1];

      }

      @Override

      public Node childOf(char c) {

      if (c >= FROM && c <= TO)

      return children[(int) c - FROM];

      else return null;

      }

      @Override

      public void add(Node child) {

      int index = (int) child.value();

      children[index - FROM] = child;

      }

      @Override

      public List<Node> children() {

      List<Node> nodes = new ArrayList<Node>();

      for (Node child : children)

      if (child != null)

      nodes.add(child);

      return nodes;

      }

      }

      //////////////////////////////////////////////////////////////////////////////////////////////

      /**

      * Created by zhaoyy on 2017/2/8.

      */

      final class MapNode extends AbstractNode implements Node {

      private final Map<Character, Node> children;

      public MapNode() {

      super();

      this.children = new HashMap<Character, Node>();

      }

      public MapNode(Node parent, char value) {

      super(parent, value);

      this.children = new HashMap<Character, Node>();

      }

      @Override

      public Node childOf(char c) {

      return children.get(c);

      }

      @Override

      public void add(Node child) {

      children.put(child.value(), child);

      }

      @Override

      public List<Node> children() {

      List<Node> nodes = new ArrayList<Node>();

      nodes.addAll(children.values());

      return nodes;

      }

      }

      第三步,

      首先定義一個Node構造器:

      /**

      * Created by zhaoyy on 2017/2/8.

      */

      public interface NodeMaker {

      Node make(Node parent, char value);

      Node makeRoot();

      }

      然后構建AC自動機,實現創建及查找方法:

      /**

      * Created by zhaoyy on 2017/2/7.

      */

      public final class WordTable {

      private final Node root;

      private WordTable(Collection<? extends CharSequence> words, NodeMaker maker) {

      Node root = buildTrie(words, maker);

      setFailNode(root);

      this.root = root;

      }

      public static WordTable compile(Collection<? extends CharSequence> words) {

      if (words == null || words.isEmpty())

      throw new IllegalArgumentException();

      final NodeMaker maker;

      if (isAscii(words))

      maker = new NodeMaker() {

      @Override

      public Node make(Node parent, char value) {

      return new AsciiNode(parent, value);

      }

      @Override

      public Node makeRoot() {

      return new AsciiNode();

      }

      };

      else maker = new NodeMaker() {

      @Override

      public Node make(Node parent, char value) {

      return new MapNode(parent, value);

      }

      @Override

      public Node makeRoot() {

      return new MapNode();

      }

      };

      return new WordTable(words, maker);

      }

      private static boolean isAscii(Collection<? extends CharSequence> words) {

      for (CharSequence word : words) {

      int len = word.length();

      for (int i = 0; i < len; i++) {

      int c = (int) word.charAt(i);

      if (c < 32 || c > 126)

      return false;

      }

      }

      return true;

      }

      private static Node buildTrie(Collection<? extends CharSequence> sequences, NodeMaker maker) {

      Node root = maker.makeRoot();

      for (CharSequence sequence : sequences) {

      int len = sequence.length();

      Node current = root;

      for (int i = 0; i < len; i++) {

      char c = sequence.charAt(i);

      Node node = current.childOf(c);

      if (node == null) {

      node = maker.make(current, c);

      current.add(node);

      }

      current = node;

      if (i == len - 1)

      node.setExists(true);

      }

      }

      return root;

      }

      private static void setFailNode(final Node root) {

      root.setFail(null);

      Queue<Node> queue = new LinkedList<Node>();

      queue.add(root);

      while (!queue.isEmpty()) {

      Node parent = queue.poll();

      Node temp;

      for (Node child : parent.children()) {

      if (parent.isRoot())

      child.setFail(root);

      else {

      temp = parent.fail();

      while (temp != null) {

      Node node = temp.childOf(child.value());

      if (node != null) {

      child.setFail(node);

      break;

      }

      temp = temp.fail();

      }

      if (temp == null)

      child.setFail(root);

      }

      queue.add(child);

      }

      }

      }

      public boolean findAnyIn(CharSequence cs) {

      int len = cs.length();

      Node node = root;

      for (int i = 0; i < len; i++) {

      Node next = node.childOf(cs.charAt(i));

      if (next == null) {

      next = node.fail();

      if (next == null) {

      node = root;

      continue;

      }

      }

      if (next.exists())

      return true;

      }

      return false;

      }

      public List<MatchInfo> search(CharSequence cs) {

      if (cs == null || cs.length() == 0)

      return Collections.emptyList();

      List<MatchInfo> result = new ArrayList<MatchInfo>();

      int len = cs.length();

      Node node = root;

      for (int i = 0; i < len; i++) {

      Node next = node.childOf(cs.charAt(i));

      if (next == null) {

      next = node.fail();

      if (next == null) {

      node = root;

      continue;

      }

      }

      if (next.exists()) {

      MatchInfo info = new MatchInfo(i, next);

      result.add(info);

      node = root;

      continue;

      }

      node = next;

      }

      return result;

      }

      @Override

      public String toString() {

      return root.toString();

      }

      }

      定義一個保存查找結果的實體:

      /**

      * Created by zhaoyy on 2017/2/7.

      */

      public final class MatchInfo {

      private final int index;

      private final String word;

      public MatchInfo(int index, String word) {

      this.index = index;

      this.word = word;

      }

      public MatchInfo(int index, Node node) {

      StringBuilder builder = new StringBuilder();

      while (node != null) {

      if (!node.isRoot())

      builder.append(node.value());

      node = node.parent();

      }

      String word = builder.reverse().toString();

      this.index = index + 1 - word.length();

      this.word = word;

      }

      public int getIndex() {

      return index;

      }

      public String getWord() {

      return word;

      }

      @Override

      public String toString() {

      return index + ":" + word;

      }

      }

      第四步,調用Demo:

      public static void main(String[] args) {

      List<String> list = Arrays.asList("say", "her", "he", "she", "shr", "alone");

      WordTable table = WordTable.compile(list);

      System.out.println(table);

      System.out.println(table.search("1shesaynothingabouthislivinghimalone"));

      }

      以下是輸出結果:

      < exists="false" parent="null" fail="null">

      <s exists="false" parent=" " fail=" ">

      <a exists="false" parent="s" fail="a">

      <y exists="true" parent="a" fail=" ">

      </y>

      </a>

      <h exists="false" parent="s" fail="h">

      <e exists="true" parent="h" fail="e">

      </e>

      <r exists="true" parent="h" fail=" ">

      </r>

      </h>

      </s>

      <h exists="false" parent=" " fail=" ">

      <e exists="true" parent="h" fail=" ">

      <r exists="true" parent="e" fail=" ">

      </r>

      </e>

      </h>

      <a exists="false" parent=" " fail=" ">

      &lt;l exists="false" parent="a" fail=" ">

      <o exists="false" parent="l" fail=" ">

      <n exists="false" parent="o" fail=" ">

      <e exists="true" parent="n" fail=" ">

      </e>

      </n>

      </o>

      &lt;/l>

      </a>

      </ >

      [1:she, 4:say, 31:alone]

    【如何使用Java實現AC自動機全文檢索實例】相關文章:

    Java實現多繼承的實例07-18

    Java中synchronized的使用實例05-31

    Java for循環語句使用實例08-26

    Java中Websocket使用實例解析08-11

    如何使用java10-14

    java使用動態代理來實現AOP05-29

    java如何實現漢諾塔08-08

    如何使用java多線程08-23

    如何正確使用Java數組11-04

    <address id="ousso"></address>
    <form id="ousso"><track id="ousso"><big id="ousso"></big></track></form>
    1. 日日做夜狠狠爱欧美黑人