Cod sursa(job #1800423)

Utilizator thewildnathNathan Wildenberg thewildnath Data 7 noiembrie 2016 19:32:03
Problema Aho-Corasick Scor 100
Compilator c Status done
Runda Arhiva educationala Marime 8.29 kb
// Aho-Corasick - Nathan Wildenberg
// the program implements the Aho-Corasick algorithm
// https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm
// It reads from 'ahocorasick.in' and prints in 'ahocorasick.out'
// The program reads a string, a number of words and then the words.
// It builds an Aho-Corasick for the words and then runs the algorithm
// on the initial string. It prints the number of apparitions of each word
// from the input in the string(in the order the words were read)

#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
//#include "vector.c"
//#include "queue.c"

void *ptr_int(int val) {
  int *temp = malloc(sizeof(int));
  *temp = val;
  return temp;
}

// Get the int value of a void pointer(assume it is an int)
int val_int(void *ptr) {
  return *(int *)ptr;
}

typedef struct container {
  void *object;
  struct container *next;
} container;

// Container Contructor
// Return a pointer to a container('capsule') that encapsulates 'object'
container *new_container(void *object) {
  container *c = malloc(sizeof(container));
  c->object = object;
  c->next = NULL;
  return c;
}

typedef struct queue {
  struct container *begin;
  struct container *end;

  bool (*empty)(struct queue *);
  void *(*front)(struct queue *);
  void (*push)(struct queue *, void *);
  void (*pop)(struct queue *);
} queue;

// Check if the container is empty
bool q_empty(queue *q) {
  return q->begin == NULL;
}

// Get the first element from the container
void *front(queue *q) {
  return q->begin->object;
}

// Add an element to the end of the container
void push(queue *q, void *object) {
  container *c = new_container(object);

  if (q->begin == NULL) {
    q->begin = c;
    q->end = c;
  } else {
    q->end->next = c;
    q->end = c;
  }
}

// Remove an element from the beginning of the container
void pop(queue *q) {
  container *temp = q->begin;

  if (q->begin == q->end) {
      q->begin = NULL;
      q->end = NULL;
  } else
    q->begin = q->begin->next;

  free(temp->object);
  free(temp);
}

// Constructor
// Return a pointer to an empty queue container
queue *new_queue() {
  queue *q = malloc(sizeof(queue));
  q->begin = NULL;
  q->end = NULL;

  q->empty = &q_empty;
  q->front = &front;
  q->push = &push;
  q->pop = &pop;

  return q;
}

const double MAGNITUDE = 1.5;

typedef struct vector {
  size_t capacity;
  size_t size;

  bool (*empty)(struct vector *);
  void (*push_back)(struct vector *, void *);
  void (*pop_back)(struct vector *);

  void **elem;
} vector;


// Calculate the capacity that is enough to fit min_c
size_t get_new_capacity(size_t min_c) {
  size_t capacity = 0;

  while (capacity < min_c) {
    if (capacity == 0)
      capacity = 1;
    else {
      capacity = ceil((double)capacity * MAGNITUDE);
    }
  }

  return capacity;
}

// Increase the capacity of the container by one degree
void increase_capacity(vector *v) {
  v->capacity = get_new_capacity(v->capacity + 1);

  v->elem = realloc(v->elem, v->capacity * sizeof(void *));
}

// Check if the container is empty
bool v_empty(vector *v) {
  return v->size == 0;
}

// Add an element to the end of the container
// Increases the capacity if the container is full
void push_back(vector *v, void *object) {
  if (v->size == v->capacity)
    increase_capacity(v);
  v->elem[v->size] = object;
  ++v->size;
}

// Delete the last element in the container
// Doesn't modify the capacity of the container
void pop_back(vector *v) {
  --v->size;
  free(v->elem[v->size]);
}

// Constructor
// Return a pointer to an empty vector container
vector *new_vector() {
  vector *v = malloc(sizeof(vector));

  v->capacity = 0;
  v->size = 0;

  v->empty = &v_empty;
  v->push_back = &push_back;
  v->pop_back = &pop_back;

  v->elem = NULL;

  return v;
}

enum {
  SIGMA = 26,
  NMAX = 100,
  WORD_LENGTH = 10000,
  STR_LENGTH = 1000000
};

typedef struct state {
  int id;
  int ch;
  int sum;
  int degree;
  bool solved;
  struct state *father;
  struct state *fail;
  struct state *trans[SIGMA];
} state;

typedef struct ahocorasick {
  vector *states;
  state *root;
} ahocorasick;

int n;
char str[STR_LENGTH + 1];

state *ends[1 + NMAX];

ahocorasick *aho;

// State Constructor
state *new_node(ahocorasick *aho, int ch, state *father) {
  state *node = malloc(sizeof(state));

  node->id = aho->states->size;
  node->ch = ch;
  node->sum = 0;
  node->degree = 0;
  node->father = father;
  node->fail = aho->root;
  for (int i = 0; i < SIGMA; ++i)
    node->trans[i] = NULL;
  aho->states->push_back(aho->states, node);

  return node;
}

// AhoCorasick Constructor
ahocorasick *new_ahocorasick () {
  ahocorasick *aho = malloc(sizeof(ahocorasick));

  aho->states = new_vector();
  aho->root = new_node(aho, -1, NULL);
  // For easier and memory efficient implementation of the build_fails function
  aho->root->father = aho->root;
  aho->root->fail = aho->root;

  return aho;
}

// Add a word to the trie
void add_word(ahocorasick *aho, char *word, int index) {
  int length = strlen(word);
  state *node = aho->root;

  for (int i = 0; i < length; ++i) {
    int ch = (int)(word[i] - 'a');
    if (node->trans[ch] == NULL)
      node->trans[ch] = new_node(aho, ch, node);
    node = node->trans[ch];
  }

  ends[index] = node;
}

// Used for debugging, no longer needed
void print_state(ahocorasick *aho, state *curr) {
  if (curr != aho->root)
      print_state(aho, curr->father);
  printf("%c", ('a' + curr->ch));
}

// Build the 'fail' edge for all states in the trie
// Similar to the prefix function in the KMP algorithm
void build_fails(ahocorasick *aho) {
  queue *q = new_queue();
  q->push(q, ptr_int(0));

  while (!q->empty(q)) {
    int id = val_int(q->front(q));
    q->pop(q);
    state *curr = aho->states->elem[id];

    if (curr->father != aho->root) {
      state *temp = curr->father->fail;

      while (temp != aho->root && temp->trans[curr->ch] == NULL)
        temp = temp->fail;
      if (temp->trans[curr->ch] != NULL)
         temp = temp->trans[curr->ch];

      curr->fail = temp;
      ++curr->fail->degree;
      ++curr->father->degree;

    }

    for (int ch = 0; ch < SIGMA; ++ch)
      if (curr->trans[ch] != NULL) {
        q->push(q, ptr_int(curr->trans[ch]->id));
      }
  }

  free(q);
}

// Run the string through the Aho-Corasick
// Increments the 'sum' value of all states that match the string
void run_automaton(ahocorasick *aho, char *str) {
  int length = strlen(str);
  state *curr = aho->root;

  for (int i = 0; i < length; ++i) {
    int ch = (int)(str[i] - 'a');
    while (curr != aho->root && curr->trans[ch] == NULL)
      curr = curr->fail;
    if (curr->trans[ch] != NULL)
      curr = curr->trans[ch];
    ++curr->sum;
  }
}

// Back propagation (from leafs to root) of the 'sum' value for all states
void finalize(ahocorasick *aho) {
  queue *q = new_queue();

  for (int i = 0; i < aho->states->size; ++i)
    if (!((state *)aho->states->elem[i])->degree)
      q->push(q, ptr_int(i));

  while (!q->empty(q)) {
    int id = val_int(q->front(q));
    q->pop(q);
    state *curr = aho->states->elem[id];

    curr->fail->sum += curr->sum;
    --curr->fail->degree;
    if (curr->fail != aho->root && curr->fail->degree == 0)
      q->push(q, ptr_int(curr->fail->id));

    --curr->father->degree;
    if (curr->father != aho->root && curr->father->degree == 0)
      q->push(q, ptr_int(curr->father->id));
  }

  free(q);
}

// Read input from file and build the initial Aho-Corasick
void read_and_build() {
  freopen("ahocorasick.in", "r", stdin);

  aho = new_ahocorasick();

  scanf("%s", str);
  scanf("%d", &n);
  for (int i = 1; i <= n; ++i) {
    char word[WORD_LENGTH + 1];
    scanf("%s", word);

    add_word(aho, word, i);
  }

  build_fails(aho);
}

// Print the result for each word in the input to a file
void print() {
  freopen("ahocorasick.out", "w", stdout);
  for (int i = 1; i <= n; ++i)
    printf("%d\n", ends[i]->sum);
}

int main() {
  setbuf(stdout, NULL);
  read_and_build();
  run_automaton(aho, str);
  finalize(aho);
  print(aho);

  return 0;

}