home *** CD-ROM | disk | FTP | other *** search
/ Internet Publisher's Toolbox 2.0 / Internet Publisher's Toolbox.iso / internet / ntserver / wtsource / boolean_.c next >
Encoding:
C/C++ Source or Header  |  1994-11-30  |  16.1 KB  |  515 lines

  1. /* 
  2.  * boolean_op.c -- 
  3.  * SCCS Status     : %W%        %G%
  4.  * Author          : Huynh Quoc T. Tung
  5.  * Created On      : Sun Oct 17 16:40:46 1993
  6.  * Last Modified By: Huynh Quoc T. Tung
  7.  * Last Modified On: Tue Oct 19 08:36:47 1993
  8.  * Update Count    : 2
  9.  * Status          : Unknown, Use with caution!
  10.  */
  11.  
  12. #include <stdio.h>
  13. #include <string.h>
  14. #include "irfiles.h"
  15. #include "boolean_op.h"
  16.  
  17. struct node
  18.   long key; 
  19.   struct node *next;
  20. };
  21. struct node *head, *z, *t;
  22.  
  23. extern long number_of_qwords;
  24. extern double *document_score_array;
  25. extern search_result_struct* search_result_array;
  26.  
  27. search_result_struct *end_result = NULL;
  28.  
  29. boolean IsOperator(op)
  30.      char *op;
  31. {
  32.   if(!strcmp(op,"and") ||
  33.      !strcmp(op,"or") ||
  34.      !strcmp(op,"not"))
  35.     return(true);
  36.   else return(false);
  37. }
  38.  
  39. void stackinit()
  40. {
  41.   head = (struct node *) malloc(sizeof (* head));
  42.   z = (struct node *) malloc(sizeof(* z));
  43.   head->next = z; 
  44.   head->key = 0;
  45.   z->next = z;
  46. }
  47.  
  48. void push(v)
  49.      long v;
  50. {
  51.   t = (struct node *) malloc(sizeof(* t));
  52.   t->key = v;
  53.   t->next = head->next;
  54.   head->next = t;
  55. }
  56.  
  57. long pop()
  58. {
  59.   long x;
  60.   
  61.   t = head->next;
  62.   head->next = t->next;
  63.   x = t->key;
  64.   free(t);
  65.   return x;
  66. }
  67.  
  68. int stackempty()
  69. {
  70.   return head->next == z;
  71. }
  72.  
  73. void Union(result, set1, set2)
  74.      search_result_struct *result;
  75.      search_result_struct *set1;
  76.      search_result_struct *set2;
  77. {
  78.   while((set1->number_of_hits != 0) &&
  79.         (set2->number_of_hits != 0)) {
  80.     if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
  81.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  82.       result->doc_ids_array->score = set1->doc_ids_array->score;
  83.       ++result->number_of_hits;
  84.       ++result->doc_ids_array ;
  85.       --set1->number_of_hits;
  86.       ++set1->doc_ids_array; 
  87.     }
  88.     else if(set1->doc_ids_array->doc_id > set2->doc_ids_array->doc_id) {
  89.       result->doc_ids_array->doc_id = set2->doc_ids_array->doc_id;
  90.       result->doc_ids_array->score = set2->doc_ids_array->score;
  91.       ++result->number_of_hits;
  92.       ++result->doc_ids_array ;
  93.       --set2->number_of_hits;
  94.       ++set2->doc_ids_array;
  95.     }
  96.     else { /* doc_id1 == doc_id2 { */
  97.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  98.       result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
  99.       ++result->number_of_hits;
  100.       ++result->doc_ids_array ;
  101.       --set1->number_of_hits; --set2->number_of_hits;
  102.       ++set1->doc_ids_array; ++set2->doc_ids_array;
  103.     }
  104.   }
  105.   if((set1->number_of_hits == 0) &&
  106.      (set2->number_of_hits != 0)) {
  107.     memcpy((char *)result->doc_ids_array, 
  108.            set2->doc_ids_array, 
  109.            set2->number_of_hits * sizeof(doc_descr_struct));
  110.     set2->doc_ids_array += set2->number_of_hits;
  111.     result->doc_ids_array += set2->number_of_hits;
  112.     result->number_of_hits += set2->number_of_hits;
  113.     set2->number_of_hits = 0;
  114.   }
  115.   else if((set1->number_of_hits != 0) &&
  116.           (set2->number_of_hits == 0)) {
  117.     memcpy((char *)result->doc_ids_array, 
  118.            set1->doc_ids_array, 
  119.            set1->number_of_hits * sizeof(doc_descr_struct));
  120.     set1->doc_ids_array += set1->number_of_hits;
  121.     result->doc_ids_array += set1->number_of_hits;
  122.     result->number_of_hits += set1->number_of_hits;
  123.     set1->number_of_hits = 0;
  124.   }
  125. }
  126.  
  127. void Or_Operator(operand1, operand2)
  128.      long operand1;
  129.      long operand2;
  130. {
  131.   search_result_struct *op1, *op2;
  132.   long doc_score_size = sizeof(search_result_struct);
  133.   long doc_ids_array_size = sizeof(doc_descr_struct);
  134.   long op1_number_of_hits, op2_number_of_hits;
  135.   
  136.   op1 = &search_result_array[operand1];
  137.   op2 = &search_result_array[operand2];
  138.   op1_number_of_hits = op1->number_of_hits;
  139.   op2_number_of_hits = op2->number_of_hits;
  140.   
  141.   if(op1->number_of_hits == 0) {
  142.     if(op1->doc_ids_array != NULL)
  143.       s_free(op1->doc_ids_array);
  144.     if(op2->number_of_hits > 0) {
  145.       end_result->doc_ids_array = 
  146.         (doc_descr_struct *)
  147.           s_realloc(end_result->doc_ids_array, 
  148.                     op2->number_of_hits * doc_ids_array_size);
  149.       memcpy((char *)end_result->doc_ids_array, 
  150.              (char *)op2->doc_ids_array, 
  151.              doc_ids_array_size * op2->number_of_hits);
  152.     }
  153.     end_result->number_of_hits = op2->number_of_hits;
  154.     push(op2->word_id);
  155.   }
  156.   else if(op2->number_of_hits == 0) {
  157.     if(op2->doc_ids_array != NULL)
  158.       s_free(op2->doc_ids_array);
  159.     if(op1->number_of_hits > 0) {
  160.       end_result->doc_ids_array = 
  161.         (doc_descr_struct *)
  162.           s_realloc(end_result->doc_ids_array, 
  163.                     op1->number_of_hits * doc_ids_array_size);
  164.       memcpy((char *)end_result->doc_ids_array, 
  165.              (char *)op1->doc_ids_array, 
  166.              doc_ids_array_size * op1->number_of_hits);
  167.     }
  168.     end_result->number_of_hits = op1->number_of_hits;
  169.     push(op1->word_id);
  170.   } 
  171.   else if((op1->number_of_hits != 0) &&
  172.           (op2->number_of_hits != 0)) { 
  173.     end_result->doc_ids_array = 
  174.       (doc_descr_struct *)
  175.         s_realloc(end_result->doc_ids_array,
  176.                   (op1->number_of_hits + op2->number_of_hits) * 
  177.                   doc_ids_array_size);
  178.     end_result->number_of_hits = 0;
  179.     
  180.     Union(end_result, op1, op2);
  181.     
  182.     op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
  183.     op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
  184.     end_result->doc_ids_array -= end_result->number_of_hits;
  185.     op2->number_of_hits = end_result->number_of_hits;
  186.     s_free(op1->doc_ids_array);
  187.     if(end_result->number_of_hits > 0) {
  188.       op2->doc_ids_array = 
  189.         (doc_descr_struct *)
  190.           s_realloc(op2->doc_ids_array, 
  191.                     end_result->number_of_hits * doc_ids_array_size);
  192.       memcpy((char *)op2->doc_ids_array, 
  193.              (char *)end_result->doc_ids_array, 
  194.              end_result->number_of_hits * doc_ids_array_size);
  195.     }
  196.     else {
  197.       op2->number_of_hits = end_result->number_of_hits;
  198.       s_free(op2->doc_ids_array);
  199.     }
  200.     push(op2->word_id);
  201.   }
  202. }
  203.  
  204. void Intersection(result, set1, set2)
  205.      search_result_struct * result;
  206.      search_result_struct * set1;
  207.      search_result_struct * set2;
  208. {
  209.   while((set1->number_of_hits != 0) &&
  210.         (set2->number_of_hits != 0)) {
  211.     if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
  212.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  213.       result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
  214.       ++result->number_of_hits;
  215.       ++result->doc_ids_array ;
  216.       --set1->number_of_hits; --set2->number_of_hits;
  217.       ++set1->doc_ids_array ; ++set2->doc_ids_array ;
  218.     }
  219.     else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
  220.       --set1->number_of_hits;
  221.       ++set1->doc_ids_array ;
  222.     }
  223.     else /* doc_id1 > doc_id2 */
  224.       {
  225.         --set2->number_of_hits;
  226.         ++set2->doc_ids_array ;
  227.       }
  228.   }
  229. }
  230.  
  231. void And_Operator(operand1, operand2)
  232.      long operand1;
  233.      long operand2;
  234. {
  235.   search_result_struct *op1, *op2;
  236.   long doc_score_size = sizeof(search_result_struct);
  237.   long doc_ids_array_size = sizeof(doc_descr_struct);
  238.   long op1_number_of_hits, op2_number_of_hits;
  239.   
  240.   op1 = &search_result_array[operand1];
  241.   op2 = &search_result_array[operand2];
  242.   op1_number_of_hits = op1->number_of_hits;
  243.   op2_number_of_hits = op2->number_of_hits;
  244.   
  245.   if(op1->number_of_hits == 0) {
  246.     end_result->number_of_hits = 0;
  247.     if(op1->doc_ids_array != NULL)
  248.       s_free(op1->doc_ids_array);
  249.     if(op2->doc_ids_array != NULL)
  250.       s_free(op2->doc_ids_array);
  251.     push(op1->word_id);
  252.   }
  253.   else if(op2->number_of_hits == 0) {
  254.     end_result->number_of_hits = 0;
  255.     if(op1->doc_ids_array != NULL)
  256.       s_free(op1->doc_ids_array);
  257.     if(op2->doc_ids_array != NULL)
  258.       s_free(op2->doc_ids_array);
  259.     push(op2->word_id);
  260.   }
  261.   else if((op1->number_of_hits != 0) &&
  262.           (op2->number_of_hits != 0)) {
  263.     if(op1->number_of_hits > op2->number_of_hits)
  264.       end_result->doc_ids_array = 
  265.         (doc_descr_struct *)
  266.           s_realloc(end_result->doc_ids_array,
  267.                     op2->number_of_hits * doc_ids_array_size);
  268.     else
  269.       end_result->doc_ids_array = 
  270.         (doc_descr_struct *)
  271.           s_realloc(end_result->doc_ids_array,
  272.                     op1->number_of_hits * doc_ids_array_size);
  273.     end_result->number_of_hits = 0;
  274.  
  275.     Intersection(end_result, op1, op2);
  276.  
  277.     op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
  278.     op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
  279.     end_result->doc_ids_array -= end_result->number_of_hits;
  280.     op2->number_of_hits = end_result->number_of_hits;
  281.     s_free(op1->doc_ids_array);
  282.     if(end_result->number_of_hits > 0) {
  283.       op2->doc_ids_array = 
  284.         (doc_descr_struct *)
  285.           s_realloc(op2->doc_ids_array, 
  286.                     end_result->number_of_hits * doc_ids_array_size);
  287.       memcpy((char *)op2->doc_ids_array, 
  288.              (char *)end_result->doc_ids_array, 
  289.              end_result->number_of_hits * doc_ids_array_size);
  290.     }
  291.     else {
  292.       op2->number_of_hits = end_result->number_of_hits;
  293.       s_free(op2->doc_ids_array);
  294.     }
  295.     push(op2->word_id);
  296.   }
  297. }
  298.  
  299. void Difference(result, set1, set2)
  300.      search_result_struct *result;
  301.      search_result_struct *set1;
  302.      search_result_struct *set2;
  303. {
  304.   while((set1->number_of_hits != 0) &&
  305.         (set2->number_of_hits != 0)) {
  306.     if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
  307.       --set1->number_of_hits; --set2->number_of_hits;
  308.       ++set1->doc_ids_array; ++set2->doc_ids_array;
  309.     }
  310.     else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
  311.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  312.       result->doc_ids_array->score = set1->doc_ids_array->score;
  313.       ++result->number_of_hits;
  314.       ++result->doc_ids_array;
  315.       --set1->number_of_hits;
  316.       ++set1->doc_ids_array;
  317.     }
  318.     else /* doc_id1 > doc_id2 */ {
  319.       --set2->number_of_hits;
  320.       ++set2->doc_ids_array;
  321.     }
  322.   }
  323.   if((set1->number_of_hits != 0) &&
  324.      (set2->number_of_hits == 0)) {
  325.     memcpy((char *)result->doc_ids_array, 
  326.            set1->doc_ids_array, 
  327.            set1->number_of_hits * sizeof(doc_descr_struct));
  328.     set1->doc_ids_array += set1->number_of_hits;
  329.     result->doc_ids_array += set1->number_of_hits;
  330.     result->number_of_hits += set1->number_of_hits;
  331.     set1->number_of_hits = 0;
  332.   }
  333. }
  334.  
  335. void Not_Operator( operand1, operand2)
  336.      long operand1;
  337.      long operand2;
  338. {
  339.   search_result_struct *op1, *op2;
  340.   long doc_score_size = sizeof(search_result_struct);
  341.   long doc_ids_array_size = sizeof(doc_descr_struct);
  342.   long op1_number_of_hits, op2_number_of_hits;
  343.   
  344.   op1 = &search_result_array[operand1];
  345.   op2 = &search_result_array[operand2];
  346.   op1_number_of_hits = op1->number_of_hits;
  347.   op2_number_of_hits = op2->number_of_hits;
  348.   
  349.   if(op1->number_of_hits == 0) {
  350.     end_result->number_of_hits = 0;
  351.     if(op1->doc_ids_array != NULL)
  352.       s_free(op1->doc_ids_array);
  353.     if(op2->doc_ids_array != NULL)
  354.       s_free(op2->doc_ids_array);
  355.     push(op1->word_id);
  356.   }
  357.   else if(op2->number_of_hits == 0) {
  358.     if(op2->doc_ids_array != NULL)
  359.       s_free(op2->doc_ids_array);
  360.     if(op1->number_of_hits > 0) {
  361.       end_result->doc_ids_array = 
  362.         (doc_descr_struct *)
  363.           s_realloc(end_result->doc_ids_array, 
  364.                     op1->number_of_hits * doc_ids_array_size);
  365.       memcpy((char *)end_result->doc_ids_array, 
  366.              (char *)op1->doc_ids_array, 
  367.              doc_ids_array_size * op1->number_of_hits + 1);
  368.     }
  369.     end_result->number_of_hits = op1->number_of_hits;
  370.     push(op1->word_id);
  371.   }
  372.   else if((op1->number_of_hits != 0) &&
  373.           (op2->number_of_hits != 0)) {
  374.     end_result->doc_ids_array = 
  375.       (doc_descr_struct *)
  376.         s_realloc(end_result->doc_ids_array,
  377.                  (op1->number_of_hits + op2->number_of_hits) * 
  378.                   doc_ids_array_size);
  379.     end_result->number_of_hits = 0;
  380.     
  381.     Difference(end_result, op1, op2);
  382.     
  383.     op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
  384.     op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
  385.     end_result->doc_ids_array -= end_result->number_of_hits;
  386.     op1->number_of_hits = end_result->number_of_hits;
  387.     s_free(op2->doc_ids_array);
  388.     if(end_result->number_of_hits > 0) {
  389.       op1->doc_ids_array = 
  390.         (doc_descr_struct *)
  391.           s_realloc(op1->doc_ids_array, 
  392.                     end_result->number_of_hits * doc_ids_array_size);
  393.       memcpy((char *)op1->doc_ids_array, 
  394.              (char *)end_result->doc_ids_array, 
  395.              end_result->number_of_hits * doc_ids_array_size);
  396.     }
  397.     else {
  398.       op1->number_of_hits = end_result->number_of_hits;
  399.       s_free(op2->doc_ids_array);
  400.     }
  401.     push(op1->word_id);
  402.   }
  403. }
  404.  
  405. boolean stackinitialized = false;
  406.  
  407. boolean boolean_operations(operator)
  408.      char *operator;
  409. {
  410.   long word_id1, word_id2;
  411. #ifndef WIN32
  412.   int i;
  413. #endif
  414.   
  415.   word_id1 = word_id2 = -1L;
  416.   
  417.   if (!stackinitialized) {
  418.     stackinit();
  419.     stackinitialized = true;
  420.   }
  421.   if (!strcmp(operator, "or")) {
  422.     if(stackempty())
  423.       word_id1 = -1L;
  424.     else word_id1 = pop();
  425.     if(stackempty())
  426.       word_id2 = -1L;
  427.     else word_id2 = pop();
  428.     if((word_id1 == -1) || (word_id2 == -1))
  429.       return(false);
  430.       /*waislog(WLOG_HIGH, WLOG_ERROR,
  431.               "boolean search failed, too few operands.\n");*/
  432.     else Or_Operator(word_id1, word_id2);
  433.   }
  434.   else if (!strcmp(operator, "and")) {
  435.     if(stackempty())
  436.       word_id1 = -1L;
  437.     else word_id1 = pop();
  438.     if(stackempty())
  439.       word_id2 = -1L;
  440.     else word_id2 = pop();
  441.     if((word_id1 == -1) || (word_id2 == -1))
  442.       return(false);
  443.       /*waislog(WLOG_HIGH, WLOG_ERROR,
  444.               "boolean search failed, too few operands.\n");*/
  445.     else And_Operator(word_id1, word_id2);
  446.   }
  447.   else if (!strcmp(operator, "not")) {
  448.     if(stackempty())
  449.       word_id1 = -1L;
  450.     else word_id1 = pop();
  451.     if(stackempty())
  452.       word_id2 = -1L;
  453.     else word_id2 = pop();
  454.     if((word_id1 == -1) || (word_id2 == -1))
  455.       return(false);
  456.       /*waislog(WLOG_HIGH, WLOG_ERROR,
  457.               "boolean search failed, too few operands.\n");*/
  458.     else Not_Operator(word_id2, word_id1);
  459.   }
  460. }
  461.  
  462. void save_word_id(word_id)
  463.      long word_id;
  464. {
  465.   if (!stackinitialized) {
  466.     stackinit();
  467.     stackinitialized = true;
  468.   }
  469.   if(end_result == NULL)
  470.     end_result = 
  471.       (search_result_struct *)s_malloc(sizeof(search_result_struct));
  472.   if(end_result->doc_ids_array == NULL)
  473.     end_result->doc_ids_array = 
  474.       (doc_descr_struct *)
  475.         s_malloc((search_result_array[word_id].number_of_hits + 1) *
  476.                  sizeof(doc_descr_struct));
  477.   else end_result->doc_ids_array = 
  478.       (doc_descr_struct *)
  479.         s_realloc(end_result->doc_ids_array, 
  480.                   (search_result_array[word_id].number_of_hits + 1) *
  481.                   sizeof(doc_descr_struct));
  482.   end_result->number_of_hits = search_result_array[word_id].number_of_hits;
  483.   if(search_result_array[word_id].doc_ids_array != NULL) 
  484.     memcpy((char*)end_result->doc_ids_array,
  485.            (char*)search_result_array[word_id].doc_ids_array,
  486.            search_result_array[word_id].number_of_hits * 
  487.            sizeof(doc_descr_struct));
  488.   push(word_id);
  489. }
  490.  
  491. long retriev_result(entries)
  492.      long entries;
  493. {
  494.   int doc_id, count;
  495.   long number_of_hits = 0;
  496.   
  497.   if((end_result != NULL) && (document_score_array != NULL))
  498.     for(count=0; count < end_result->number_of_hits; count++) {
  499.       doc_id = end_result->doc_ids_array[count].doc_id;
  500.       document_score_array[doc_id] = end_result->doc_ids_array[count].score;
  501.       ++number_of_hits;
  502.     }
  503.   number_of_qwords = 0;
  504.   stackinitialized = false;
  505.   if(end_result != NULL) {
  506.     if(end_result->doc_ids_array != NULL)
  507.       s_free(end_result->doc_ids_array);
  508.     s_free(end_result);
  509.   }
  510.   if(search_result_array != NULL)
  511.     s_free(search_result_array);
  512.   return(number_of_hits);
  513.