Cod sursa(job #3148385)

Utilizator mircea_007Mircea Rebengiuc mircea_007 Data 31 august 2023 17:10:23
Problema Aho-Corasick Scor 100
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 3.27 kb
#include <stdio.h>
#include <vector>
#include <queue>

#define magic_sauce inline __attribute__((always_inline))

const int MAXN = 1e6;
const int SIGMA = 26;
const int NIL = -1;

struct TrieNode {
  int next[SIGMA];

  int fallback;
  int par;
  int edge_ch;

  int freq;

  TrieNode( int p, int ch ) : par( p ), edge_ch( ch ){
    // nu sunt inca atat de nebun sa bag memset( fwd, 0xff )
    for( int i = 0 ; i < SIGMA ; i++ )
      next[i] = NIL;
    
    fallback = NIL;
    freq = 0;
  }
};

struct AhoCorasick {
  std::vector<TrieNode> t; // nume scurt
  int cursor;

  std::vector<int> word_link;

  AhoCorasick(){
    t.emplace_back( NIL, '?' );
  }

  void push_word( char *str ){
    int u = 0, ch;

    while( *str ){
      ch = *(str++) - 'a';

      if( t[u].next[ch] == NIL ){
        t[u].next[ch] = (int)t.size();
        t.emplace_back( u, ch );
      }

      u = t[u].next[ch];
    }

    word_link.push_back( u );
  }

  void init_traversal(){
    int u, v, ch;

    // get links and fallbacks
    std::queue<int> q( { 0 } );

    for( ch = 0 ; ch < SIGMA ; ch++ )
      if( t[0].next[ch] == NIL )
        t[0].next[ch] = 0;

    t[0].fallback = 0;

    while( !q.empty() ){
      u = q.front();
      q.pop();

      if( u ){
        for( ch = 0 ; ch < SIGMA ; ch++ )
          if( t[u].next[ch] != NIL )
            q.push( t[u].next[ch] );

        // first of all, we find the fallback
        // u -> parent -> parents fallback -> go forward with ch
        if( t[u].par )
          t[u].fallback = v = t[t[t[u].par].fallback].next[t[u].edge_ch];
        else
          t[u].fallback = v = 0;

        // now we go forward with every possible character
        for( ch = 0 ; ch < SIGMA ; ch++ )
          if( t[u].next[ch] == NIL )
            t[u].next[ch] = t[v].next[ch];
      }else
        for( ch = 0 ; ch < SIGMA ; ch++ )
          if( t[u].next[ch] )
            q.push( t[u].next[ch] );
    }

    // initial node is the root
    cursor = 0;
  }

  magic_sauce void push( char ch ){
    cursor = t[cursor].next[(int)(ch - 'a')];

    t[cursor].freq++;
  }

  void init_freq(){
    std::vector<int> lenghtwise( { 0 } );
    std::vector<bool> viz( t.size(), false ); // pentru ca vreau neaparat sa nu am 2 vectori in TrieNode

    int i = 0, u, ch;

    // mini-bfs
    viz[0] = true;
    while( i < (int)lenghtwise.size() ){
      u = lenghtwise[i];

      for( ch = 0 ; ch < SIGMA ; ch++ )
        if( !viz[t[u].next[ch]] ){
          lenghtwise.push_back( t[u].next[ch] );
          viz[t[u].next[ch]] = true;
        }

      i++;
    }

    for( i = (int)lenghtwise.size() ; i-- ; ){
      u = lenghtwise[i];

      t[t[u].fallback].freq += t[u].freq;
    }
  }

  int word_freq( int i ){
    return t[word_link[i]].freq;
  }

} dictionary;

char text[MAXN + 2];
char pat[MAXN + 2];

int main(){
  FILE *fin  = fopen( "ahocorasick.in", "r" );
  FILE *fout = fopen( "ahocorasick.out", "w" );

  int n, i;

  fscanf( fin, "%s %d", text, &n );
  for( i = 0 ; i < n ; i++ ){
    fscanf( fin, " %s", pat );
    dictionary.push_word( pat );
  }

  dictionary.init_traversal();

  i = 0;
  while( text[i] ){
    dictionary.push( text[i] );
    i++;
  }

  dictionary.init_freq();

  for( i = 0 ; i < n ; i++ )
    fprintf( fout, "%d\n", dictionary.word_freq( i ) );

  fclose( fin );
  fclose( fout );
  return 0;
}