filter.c
Go to the documentation of this file.
1 /* ========================================================================== */
2 /*! \file
3  * \brief Article filtering
4  *
5  * Copyright (c) 2012-2020 by the developers. See the LICENSE file for details.
6  *
7  * If nothing else is specified, function return zero to indicate success
8  * and a negative value to indicate an error.
9  */
10 
11 
12 /* ========================================================================== */
13 /* Include headers */
14 
15 #include "posix.h" /* Include this first because of feature test macros */
16 
17 #include <string.h>
18 
19 #include "conf.h"
20 #include "core.h"
21 #include "encoding.h"
22 #include "filter.h"
23 #include "fileutils.h"
24 #include "main.h"
25 #include "xdg.h"
26 
27 
28 /* ========================================================================== */
29 /*! \defgroup FILTER FILTER: Regular expressions and scoring
30  *
31  * Location of scorefile: \c $XDG_CONFIG_HOME/$CFG_NAME/scorefile
32  *
33  * Any line starting with \c # is treated as a comment (not parsed and ignored).
34  * All other lines are parsed as rules with 4 colon-separated fields:
35  * - Field 1: Group wildmat (the rule takes effect for matching groups only)
36  * - Field 2: Type (indicating the target element and matching method)
37  * - Field 3: Score (signed integer value)
38  * - Field 4: String
39  * No whitespace is allowed between the fields and separators.
40  *
41  * \note
42  * Because \c : (colon) is used as field separator, it is not allowed to use
43  * it in wildmats.
44  *
45  * Rules with unknown type are ignored.
46  *
47  * \attention
48  * It is required that 'SSIZE_MAX' is at least 'INT_MAX' (must be checked by
49  * build system).
50  */
51 /*! @{ */
52 
53 
54 /* ========================================================================== */
55 /* Constants */
56 
57 /*! \brief Message prefix for FILTER module */
58 #define MAIN_ERR_PREFIX "FILTER: "
59 
60 /*! \brief Permissions for score file */
61 #define FILTER_PERM (posix_mode_t) (POSIX_S_IRUSR | POSIX_S_IWUSR)
62 
63 /*! \name Score limits
64  *
65  * Type must be \c int .
66  * Minimum and maximum values are \c INT_MIN and \c INT_MAX .
67  */
68 /*! @{ */
69 #define FILTER_SCORE_MAX INT_MAX
70 #define FILTER_SCORE_MIN INT_MIN
71 /*! @} */
72 
73 
74 /* ========================================================================== */
75 /* Data types */
76 
77 /* Data types of score entries
78  *
79  * The IDs must start with value 0 (unknown type) and must be contiguous
80  * Most used types should be defined first for better performance
81  */
82 enum filter_rule_type
83 {
84  SCORE_TYPE_UNKNOWN = 0,
85  /* ----------------------------------------------------------------------- */
86  SCORE_TYPE_FROM = 1, /* Literal string vs. 'From' */
87  SCORE_TYPE_FROM_ERE = 2, /* Extended regular expression vs. 'From' */
88  SCORE_TYPE_SUBJECT = 3, /* Literal string vs. 'Subject' */
89  SCORE_TYPE_SUBJECT_ERE = 4, /* Extended regular expression vs. 'Subject' */
90  SCORE_TYPE_MSGID_ERE = 5, /* Extended regular expr. vs. 'Message-ID' */
91  SCORE_TYPE_GROUP = 6, /* Literal string vs. element of 'Newsgroups' */
92  /* ----------------------------------------------------------------------- */
93  SCORE_END_OF_LIST = 7
94 };
95 
96 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
97 /* Wildmat linked pattern list element */
98 struct filter_wm
99 {
100  int negate;
101  enum filter_cs cs;
102  posix_regex_t* ere;
103  struct filter_wm* next;
104 };
105 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
106 
107 /* Scoring rule */
108 struct filter
109 {
110  const char* group_wildmat;
111  enum filter_rule_type type;
112  int value;
113  const char* string;
114  int found;
115  struct filter* next;
116  enum filter_cs cs;
117 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
118  struct filter_wm* wm; /* Object for 'group_wildmat' */
119  posix_regex_t* ere; /* Extended regular expression for 'string' */
120 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
121 };
122 
123 
124 /* ========================================================================== */
125 /* Variables */
126 
127 /* Rule names for first field of score file entries
128  *
129  * \attention
130  * The data type 'enum filter_rule_type' must be suitable as index!
131  */
132 static const char* filter_type_name[] =
133 {
134  "unknown",
135  /* ----------------------------------------------------------------------- */
136  "from",
137  "from_ere",
138  "subject",
139  "subject_ere",
140  "msgid_ere",
141  "group",
142  /* ----------------------------------------------------------------------- */
143  "eol"
144 };
145 
146 static enum filter_cs filter_locale = FILTER_CS_ASCII;
147 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
148 static enum filter_cs testgroup_cs;
149 static posix_regex_t* testgroup_ere = NULL;
150 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
151 static size_t score_len_max = 1000;
152 static struct filter* scores = NULL;
153 static const char scorefile_name[] = "scorefile";
154 
155 
156 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
157 
158 
159 /* ========================================================================== */
160 /* Print error message if system failed to compile a regular expression
161  *
162  * \param[in] code Error code
163  * \param[in] ere Pointer to compiled ERE
164  *
165  * The value \e code must be nonzero and the last error code returned from
166  * \c regcomp() for \e ere according to the POSIX standard:
167  * <br>
168  * http://pubs.opengroup.org/onlinepubs/9699919799/functions/regcomp.html
169  *
170  * \attention
171  * The definition "last error code returned" is incomplete (may mean the last
172  * call from a thread or the last call from the whole process)
173  * => Always use the filter module from the same, single thread.
174  *
175  * \note
176  * The error message is always formatted using the POSIX locale.
177  */
178 
179 static void filter_print_ere_error(int code, posix_regex_t* ere)
180 {
181  const char* mod_name = MAIN_ERR_PREFIX;
182  size_t mod_len = strlen(mod_name);
183  size_t len;
184  char* buf = NULL;
185 
186  if(!code)
187  {
188  PRINT_ERROR("Can't process invalid error code");
189  }
190  else
191  {
192 # if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI
193  /* Don't use NLS for error messages on stderr */
194  posix_setlocale(POSIX_LC_MESSAGES, "POSIX");
195 # endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI */
196  len = posix_regerror(code, ere, buf, 0);
197  if(!len || POSIX_SIZE_MAX - mod_len < len)
198  {
199  PRINT_ERROR("Error message has invalid size");
200  }
201  else
202  {
203  buf = (char*) posix_malloc(mod_len + len);
204  if(NULL == buf)
205  {
206  PRINT_ERROR("Cannot allocate memory for error message");
207  }
208  else
209  {
210  memcpy(buf, mod_name, mod_len);
211  posix_regerror(code, ere, &buf[mod_len], len);
212  print_error(buf);
213  posix_free((void*) buf);
214  }
215  }
216 # if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI
217  posix_setlocale(POSIX_LC_MESSAGES, "");
218 # endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI */
219  }
220 }
221 
222 
223 /* ========================================================================== */
224 /* Compile ERE
225  *
226  * \param[out] cs Pointer to codeset of character classification locale
227  * \param[out] ere Pointer to compiled ERE
228  * \param[in] string Raw ERE pattern
229  *
230  * \note
231  * On success, the caller is responsible for freeing the ressources allocated
232  * for the object pointed to by \e ere .
233  */
234 
235 static int filter_compile_ere(enum filter_cs* cs, posix_regex_t** ere,
236  const char* string)
237 {
238  int res = 0;
239  int rv;
240  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
241  const char* pat;
242  const char* p = NULL;
243 
244  *cs = filter_locale;
245  if(FILTER_CS_ISO8859_1 == filter_locale)
246  {
247  /* Convert string to ISO 8859-1 */
248  p = enc_convert_to_8bit(&charset, string, NULL);
249  if(NULL == p) { res = -1; }
250  else if(ENC_CS_ISO8859_1 != charset) { res = -1; }
251  }
252  else if(FILTER_CS_UTF_8 != filter_locale)
253  {
254  /* Treat unsupported codeset as ASCII */
255  *cs = FILTER_CS_ASCII;
256  res = enc_ascii_check(string);
257  }
258  if(res)
259  {
260  /* String cannot be process without UTF-8 locale */
261  PRINT_ERROR("ERE cannot be used with current locale");
262  }
263  else
264  {
265  /* Allocate memory */
266  *ere = (posix_regex_t*) posix_malloc(sizeof(posix_regex_t));
267  if(NULL == *ere)
268  {
269  PRINT_ERROR("Cannot allocate memory for regular expression");
270  res = -1;
271  }
272  else
273  {
274  /* Compile regular expression if required */
275  pat = string;
276  if(FILTER_CS_ISO8859_1 == filter_locale) { pat = p; }
277  rv = posix_regcomp(*ere, pat, POSIX_REG_EXTENDED | POSIX_REG_NOSUB);
278  if(rv)
279  {
280  PRINT_ERROR("Compiling regular expression failed");
281  filter_print_ere_error(rv, *ere);
282  posix_free((void*) *ere);
283  res = -1;
284  }
285  else if(main_debug)
286  {
287  printf("%s: %sCompiling regular expression\n",
288  CFG_NAME, MAIN_ERR_PREFIX);
289  }
290  }
291  }
292  /* Release memory for ISO 8859-1 string */
293  if(NULL != p && string != p) { enc_free((void*) p); }
294 
295  return(res);
296 }
297 
298 
299 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
300 
301 
302 /* ========================================================================== */
303 /* Score rule destructor
304  *
305  * \param[in,out] rule Object created by \ref filter_score_rule_contructor()
306  */
307 
308 static void filter_score_rule_destructor(struct filter** rule)
309 {
310 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
311  struct filter_wm* p;
312  struct filter_wm* q;
313 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
314 
315  if(NULL != rule && NULL != *rule)
316  {
317  posix_free((void*) (*rule)->group_wildmat);
318  posix_free((void*) (*rule)->string);
319 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
320  /* Destroy wildmat linked pattern list */
321  p = (*rule)->wm;
322  while(NULL != p)
323  {
324  q = p->next;
325  posix_regfree(p->ere);
326  posix_free((void*) p->ere);
327  posix_free((void*) p);
328  p = q;
329  }
330  /* Destroy regular expression object for string ERE */
331  if(NULL != (*rule)->ere)
332  {
333  posix_regfree((*rule)->ere);
334  posix_free((void*) (*rule)->ere);
335  }
336 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
337  posix_free((void*) *rule);
338  *rule = NULL;
339  }
340 }
341 
342 
343 /* ========================================================================== */
344 /* Score rule constructor
345  *
346  * \param[out] new_rule Pointer to new rule
347  * \param[in] group_wildmat Limit rule scope to groups matching this wildmat
348  * \param[in] type Type of new rule
349  * \param[in] score Score if rule matches
350  * \param[in] string Literal string or regular expression
351  * \param[in] dcre Don't compile regular expressions if nonzero
352  *
353  * The parameter \e dcre should only be nonzero for exporting the rule data
354  * back to the scorefile (for comparing the raw regular expression strings).
355  *
356  * \attention
357  * The parameters \e group_wildmat and \e string must point to a memory block
358  * allocated with the function \ref posix_malloc() and will become part of the
359  * created object.
360  * On error the caller stay responsible to free the memory for \e wildmat and
361  * \e string .
362  *
363  * \note
364  * On success, the caller is responsible for freeing the ressources allocated
365  * for the object \e rule (use \ref filter_score_rule_destructor() function).
366  *
367  * On error \c NULL is written to \e new_rule .
368  */
369 
370 static int filter_score_rule_constructor(struct filter** new_rule,
371  const char* group_wildmat,
372  enum filter_rule_type type,
373  int score, const char* string,
374  int dcre)
375 {
376  int res = 0;
377 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
378  int rv;
379  struct enc_wm_pattern* wma;
380  struct filter_wm* pat;
381  int i;
382  struct filter_wm* last = NULL;
383 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
384 
385  /* Create new rule */
386  *new_rule = (struct filter*) posix_malloc(sizeof(struct filter));
387  if(NULL == *new_rule) { res = -1; }
388  else
389  {
390  (*new_rule)->group_wildmat = group_wildmat;
391  (*new_rule)->type = type;
392  (*new_rule)->value = score;
393  (*new_rule)->string = string;
394  (*new_rule)->found = 0;
395  (*new_rule)->next = NULL;
396  (*new_rule)->cs = FILTER_CS_UTF_8;
397  }
398 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
399  if(!res)
400  {
401  (*new_rule)->wm = NULL;
402  (*new_rule)->ere = NULL;
403 
404  /* Compile regular expressions for wildmat patterns */
405  if(!dcre)
406  {
407  if(strcmp("*", (*new_rule)->group_wildmat))
408  {
409  rv = enc_create_wildmat(&wma, group_wildmat);
410  if(0 < rv)
411  {
412  /* Process array backwards to get rightmost pattern first */
413  for(i = rv; i; --i)
414  {
415  pat = (struct filter_wm*)
416  posix_malloc(sizeof(struct filter_wm));
417  if(NULL == pat) { res = -1; break; }
418  else
419  {
420  pat->negate = wma[i - 1].negate;
421  pat->cs = FILTER_CS_UTF_8;
422  res = filter_compile_ere(&pat->cs, &pat->ere,
423  wma[i - 1].ere);
424  pat->next = NULL;
425  /* Link list */
426  if(NULL == last) { (*new_rule)->wm = pat; }
427  else { last->next = pat; }
428  last = pat;
429  }
430  if(res) { break; }
431  }
432  enc_destroy_wildmat(&wma, rv);
433  }
434  }
435  }
436 
437  /* Compile regular expression for string */
438  if(!res && !dcre)
439  {
440  switch((*new_rule)->type)
441  {
442  case SCORE_TYPE_FROM_ERE:
443  case SCORE_TYPE_SUBJECT_ERE:
444  case SCORE_TYPE_MSGID_ERE:
445  {
446  res = filter_compile_ere(&(*new_rule)->cs, &(*new_rule)->ere,
447  string);
448  break;
449  }
450  default:
451  {
452  /* Rule do not use a regular expression */
453  break;
454  }
455  }
456  }
457 
458  /* Check for error */
459  if(res)
460  {
461  PRINT_ERROR("Creating score rule failed");
462  /* Mask strings to prevent double free */
463  (*new_rule)->group_wildmat = NULL;
464  (*new_rule)->string = NULL;
465  filter_score_rule_destructor(new_rule);
466  }
467  }
468 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
469 
470  return(res);
471 }
472 
473 
474 /* ========================================================================== */
475 /* Calculate and clamp new score
476  *
477  * \param[in] val Old value
478  * \param[in] val Difference to old value that should be aplied
479  *
480  * \return
481  * - Updated score
482  */
483 
484 static int filter_score_add(int val, int diff)
485 {
486  /* Check for increase */
487  if(0 < diff)
488  {
489  if(0 < val)
490  {
491  /* Clamp to upper limit */
492  if(FILTER_SCORE_MAX - val < diff) { val = FILTER_SCORE_MAX; }
493  else { val += diff; }
494  }
495  else { val += diff; }
496  }
497 
498  /* Check for decrease */
499  if(0 > diff)
500  {
501  if(0 > val)
502  {
503  /* Clamp to lower limit */
504  if(FILTER_SCORE_MIN - val > diff) { val = FILTER_SCORE_MIN; }
505  else { val += diff; }
506  }
507  else { val += diff; }
508  }
509 
510  return(val);
511 }
512 
513 
514 /* ========================================================================== */
515 /* Add score rule
516  *
517  * \param[in] new_rule Rule to add
518  */
519 
520 static void filter_add_score_rule(struct filter* new_rule)
521 {
522  struct filter* last_rule = scores;
523 
524  if(NULL == last_rule) { scores = new_rule; }
525  else
526  {
527  /* Append rule to end of list */
528  while(NULL != last_rule->next) { last_rule = last_rule->next; }
529  last_rule->next = new_rule;
530  }
531 }
532 
533 
534 /* ========================================================================== */
535 /* Delete scoring rules */
536 
537 static void filter_delete_score_rules(void)
538 {
539  struct filter* rule = scores;
540  struct filter* next_rule;
541 
542  while(NULL != rule)
543  {
544  next_rule = rule->next;
545  filter_score_rule_destructor(&rule);
546  rule = next_rule;
547  }
548  scores = NULL;
549 }
550 
551 
552 /* ========================================================================== */
553 /* Decode score rule from score file
554  *
555  * \param[out] rule Pointer to decoded rule object
556  * \param[in] line Line from score file
557  * \param[in] len Line buffer size
558  * \param[in] dcre Dont't compile potential regular expressions if nonzero
559  *
560  * On error, \c NULL is written to \e rule .
561  *
562  * \note
563  * On success, the caller is responsible for freeing the ressources allocated
564  * for the object \e rule (use \ref filter_score_rule_destructor() function).
565  */
566 
567 static int filter_decode_rule(struct filter** rule,
568  const char* line, size_t len, int dcre)
569 {
570  int res = -1;
571  const char* p;
572  char* q;
573  enum filter_rule_type type;
574  size_t start = 0;
575  int score = 0;
576  char* group_wildmat = NULL;
577  size_t wm_len;
578  char* string = NULL;
579  int rv;
580  int error = 0;
581 
582  /* Init rule pointer to defined value */
583  *rule = NULL;
584 
585  /* Since version 1.0 all rules have a wildmat as first field */
586  p = strchr(line, (int) ':');
587  if(NULL == p)
588  {
589  PRINT_ERROR("Malformed rule in score file");
590  }
591  else
592  {
593  error = 1;
594  wm_len = (size_t) (p - line);
595  q = (char*) posix_realloc((void*) group_wildmat, wm_len + (size_t) 1);
596  if(NULL == q)
597  {
598  PRINT_ERROR("Cannot allocate memory for wildmat");
599  }
600  else
601  {
602  group_wildmat = q;
603  strncpy(group_wildmat, line, wm_len);
604  group_wildmat[wm_len] = 0;
605  line += wm_len + (size_t) 1;
606  error = 0;
607  }
608  }
609 
610  /* Get type */
611  type = SCORE_TYPE_UNKNOWN;
612  if(!error)
613  {
614  while(SCORE_END_OF_LIST != ++type)
615  {
616  p = filter_type_name[type];
617  start = strlen(p);
618  if(start < len && !strncmp(line, p, start))
619  {
620  /* Verify that not only the first part has matched */
621  if(':' == line[start]) { break; }
622  }
623  }
624  if(SCORE_END_OF_LIST == type) { type = SCORE_TYPE_UNKNOWN; }
625  else if(main_debug)
626  {
627  printf("%s: %sScore rule type: %s\n", CFG_NAME, MAIN_ERR_PREFIX, p);
628  }
629  }
630 
631  /* Extract string from data field */
632  if(!error)
633  {
634  string = (char*) posix_malloc((size_t) len);
635  if(NULL == string)
636  {
637  PRINT_ERROR("Cannot allocate memory for score rule parser");
638  }
639  else
640  {
641  /* Decode data */
642  switch(type)
643  {
644  case SCORE_TYPE_FROM:
645  case SCORE_TYPE_FROM_ERE:
646  case SCORE_TYPE_SUBJECT:
647  case SCORE_TYPE_SUBJECT_ERE:
648  case SCORE_TYPE_MSGID_ERE:
649  case SCORE_TYPE_GROUP:
650  {
651  rv = sscanf(&line[start], ":%d:%[^\n]", &score, string);
652  if(2 != rv)
653  {
654  PRINT_ERROR("Invalid rule in score file");
655  }
656  else
657  {
658  /* Success */
659  res = 0;
660  }
661  break;
662  }
663  default:
664  {
665  PRINT_ERROR("Unknown rule type in score file");
666  break;
667  }
668  }
669  }
670  }
671 
672  /* Create score rule object */
673  if(!res)
674  {
675 #if 0
676  /* For debugging */
677  printf("=============================\n");
678  printf(" Groups: %s\n", group_wildmat);
679  printf(" Type : %s\n", filter_type_name[type]);
680  printf(" Score : %d\n", score);
681  printf(" String: %s\n", string);
682 #endif
683  res = filter_score_rule_constructor(rule, group_wildmat,
684  type, score, string, dcre);
685  }
686 
687  /* Release memory on error */
688  if(res)
689  {
690  posix_free((void*) group_wildmat);
691  posix_free((void*) string);
692  }
693 
694  /*
695  * For code review:
696  * The allocated memory blocks becomes part of the new score rule object!
697  */
698 
699  return(res);
700 }
701 
702 
703 /* ========================================================================== */
704 /* Encode score rule for score file
705  *
706  * \param[in,out] line Pointer to line for score file
707  * \param[in,out] len Pointer to line buffer size
708  * \param[in] rule Rule to encode
709  *
710  * \attention
711  * The pointer \e line must be \c NULL or point to a dynamically allocated
712  * buffer.
713  */
714 
715 static int filter_encode_rule(char** line, size_t* len, struct filter* rule)
716 {
717  const char* frt = NULL; /* Filter Rule Type */
718  int res = -1;
719  int rv;
720  char* p = NULL;
721  size_t l = 0;
722 
723  /* Create new score rule */
724  switch(rule->type)
725  {
726  case SCORE_TYPE_FROM:
727  case SCORE_TYPE_FROM_ERE:
728  case SCORE_TYPE_SUBJECT:
729  case SCORE_TYPE_SUBJECT_ERE:
730  case SCORE_TYPE_MSGID_ERE:
731  case SCORE_TYPE_GROUP:
732  {
733  frt = filter_type_name[rule->type];
734  l += strlen(rule->group_wildmat);
735  l += (size_t) 1; /* Field separator */
736  l += strlen(frt); /* Type ID */
737  l += (size_t) 1; /* Field separator */
738  l += score_len_max; /* Score value */
739  l += (size_t) 1; /* Field separator */
740  l += strlen(rule->string);
741  l += (size_t) 1; /* LF line termination */
742  l += (size_t) 1; /* NUL termination */
743  p = (char*) posix_malloc(l);
744  if(NULL == p)
745  {
746  PRINT_ERROR("Cannot allocate memory for score rule");
747  }
748  else
749  {
750  /* Since version 1.0 all rules use a wildmat */
751  rv = posix_snprintf(p, l, "%s:%s:%d:%s\n", rule->group_wildmat,
752  frt, rule->value, rule->string);
753  if(0 > rv || (size_t) rv >= l)
754  {
755  PRINT_ERROR("Encoding score rule failed");
756  posix_free((void*) p);
757  }
758  else
759  {
760  /* Success => Replace line buffer */
761  posix_free((void*) *line);
762  *line = p;
763  *len = l;
764  res = 0;
765  }
766  }
767  break;
768  }
769  default:
770  {
771  PRINT_ERROR("Encoding unknown rule type failed");
772  break;
773  }
774  }
775 
776  return(res);
777 }
778 
779 
780 /* ========================================================================== */
781 /* Check whether score rule match
782  *
783  * \param[in] line Line from score file
784  * \param[in] len Line buffer size
785  * \param[in] rule Rule to match
786  */
787 
788 static int filter_check_rule(char* line, size_t len, struct filter* rule)
789 {
790  int res = -1;
791  struct filter* current_rule;
792 
793  /* Decode line */
794  if('#' != line[0])
795  {
796  /* Set 'dcre' flag, we only want to compare the strings of the rule */
797  if(!filter_decode_rule(&current_rule, line, len, 1))
798  {
799  /* Check whether score has changed */
800  if(current_rule->type == rule->type)
801  {
802  if(!strcmp(current_rule->string, rule->string))
803  {
804  /* Match detected */
805  res = 0;
806  }
807  }
808  /* Destroy current rule object */
809  filter_score_rule_destructor(&current_rule);
810  }
811  }
812 
813  return(res);
814 }
815 
816 
817 /* ========================================================================== */
818 /* Export score rules
819  *
820  * \param[in] fs Stream corresponding to old configuration
821  * \param[in] fs_tmp Stream corresponding to new configuration
822  *
823  * The current data in memory is merged with the data from \e fs and written
824  * to \e fs_tmp .
825  */
826 
827 static int filter_export_score_rules(FILE* fs, FILE* fs_tmp)
828 {
829  int res = -1;
830  char* line = NULL;
831  size_t len = 0;
832  posix_ssize_t readlen;
833  int rv;
834  struct filter* rule;
835 
836  if(main_debug)
837  {
838  printf("%s: %sStore scoring rules\n", CFG_NAME, MAIN_ERR_PREFIX);
839  }
840 
841  while(1)
842  {
843  /* Read line */
844  readlen = posix_getline(&line, &len, fs);
845  if(-1 == readlen)
846  {
847  if(POSIX_ENOMEM == posix_errno)
848  {
849  PRINT_ERROR("Cannot assign memory for score file parser");
850  }
851  else
852  {
853  /* Check for error */
854  if(ferror(fs))
855  {
856  PRINT_ERROR("Parse error in score file");
857  }
858  /* Check for EOF */
859  else if(feof(fs))
860  {
861  res = 0;
862  }
863  }
864  }
865  if(0 >= readlen) { break; }
866  else
867  {
868  /* Update data */
869  rule = scores;
870  while(NULL != rule)
871  {
872  if(!rule->found && !filter_check_rule(line, len, rule))
873  {
874  /* Match => Update */
875  rule->found = 1;
876  filter_encode_rule(&line, &len, rule);
877  break;
878  }
879  rule = rule->next;
880  }
881 
882  /* Write line to new config file */
883  rv = fprintf(fs_tmp, "%s", line);
884  if(0 > rv) { break; }
885  }
886  }
887 
888  /* Add missing entries to end of config file */
889  if(!res)
890  {
891  rule = scores;
892  while(NULL != rule)
893  {
894  if(!rule->found)
895  {
896  rv = filter_encode_rule(&line, &len, rule);
897  if(rv) { res = -1; break; }
898  else
899  {
900  /* Write new rule */
901  rv = fprintf(fs_tmp, "%s", line);
902  if(0 > rv)
903  {
904  res = -1;
905  break;
906  }
907  }
908  }
909  rule = rule->next;
910  }
911  }
912 
913  /* Release memory for line buffer */
914  posix_free((void*) line);
915 
916  return(res);
917 }
918 
919 
920 /* ========================================================================== */
921 /* Get scorefile pathname
922  *
923  * This function must be thread safe.
924  * The caller is responsible to free the memory for the buffer on success.
925  */
926 
927 static int filter_get_pathname(const char** pathname, const char* filename)
928 {
929  int res = -1;
930  int rv;
931 
932  *pathname = xdg_get_confdir(CFG_NAME);
933  if(NULL != *pathname)
934  {
935  rv = fu_create_path(*pathname, (posix_mode_t) POSIX_S_IRWXU);
936  if(0 == rv)
937  {
938  /* Store scorefile pathname */
939  rv = xdg_append_to_path(pathname, filename);
940  if(0 == rv)
941  {
942  res = 0;
943  }
944  }
945  }
946 
947  /* Free memory on error */
948  if(0 != res)
949  {
950  PRINT_ERROR("Cannot create score file pathname");
951  posix_free((void*) *pathname);
952  *pathname = NULL;
953  }
954 
955  return(res);
956 }
957 
958 
959 /* ========================================================================== */
960 /* Check whether a group in the list matches a wildmat
961  *
962  * Returns success (zero) if one of the groups in \e grouplist matches
963  * \e wildmat .
964  */
965 
966 static int filter_group_check(struct filter* rule, const char** grouplist)
967 {
968  int res = -1;
969  size_t i;
970  const char* group;
971 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
972  struct filter_wm* plp = rule->wm;
973  int rv;
974  const char* string;
975  const char* p = NULL;
976  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
977 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
978 
979  i = 0;
980  /* Assignment in truth expression is intended! */
981  while(NULL != (group = grouplist[i++]))
982  {
983  /* "Match all" must always work */
984  if(!strcmp("*", rule->group_wildmat)) { res = 0; break; }
985  /* Check for literal match (usable for all locales) */
986  if(!strcmp(rule->group_wildmat, group)) { res = 0; break; }
987 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
988  /* Check with regular expressions created from group wildmat */
989  while(NULL != plp)
990  {
991  rv = 0;
992  string = group;
993  if(FILTER_CS_ASCII == plp->cs)
994  {
995  rv = enc_ascii_check(string);
996  }
997  else if(FILTER_CS_ISO8859_1 == plp->cs)
998  {
999  /* Try to convert data to ISO 8859-1 */
1000  p = enc_convert_to_8bit(&charset, string, NULL);
1001  if(NULL == p) { rv = -1; }
1002  else if(ENC_CS_ISO8859_1 != charset) { rv = -1; }
1003  else { string = p; }
1004  }
1005  if(!rv && !posix_regexec(plp->ere, string, 0, NULL, 0)) { res = 0; }
1006  /* Release memory for ISO 8859-1 string */
1007  if(NULL != p && group != p) { enc_free((void*) p); }
1008  /* Check for ERE match */
1009  if(!res)
1010  {
1011  /* printf("ERE of wildmat pattern matched\n"); */
1012  if(plp->negate) { res = -1; }
1013  break;
1014  }
1015  /* Next pattern in list */
1016  plp = plp->next;
1017  }
1018 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1019  }
1020 
1021  return(res);
1022 }
1023 
1024 
1025 /* ========================================================================== */
1026 /*! \brief Initialize filter module
1027  *
1028  * \param[in] utf8 Flag indicating that the locale use UTF-8 encoding
1029  *
1030  * \attention
1031  * Remember that the locale must use either UTF-8 or ISO 8859-1 codeset or be
1032  * the POSIX locale.
1033  *
1034  * Step1 (only if \c CONF_SCORERC is configured):
1035  * - Rename current \c scorefile to \c scorefile.old
1036  * - Copy pathname configured with \c CONF_SCORERC to \c scorefile
1037  *
1038  * Step 2:
1039  * - Open and lock scorefile
1040  * - Load rules from scorefile to memory
1041  *
1042  * \return
1043  * - 0 on success
1044  * - Negative value on error
1045  */
1046 
1047 int filter_init(int utf8)
1048 {
1049  int res = -1;
1050  const char* scorerc = config[CONF_SCORERC].val.s;
1051  const char* scorepathname = NULL;
1052  char* oldscorepathname = NULL;
1053  int rv;
1054  struct_posix_stat state;
1055  int fd = -1;
1056  FILE* fs = NULL;
1057  char* data = NULL;
1058  size_t len;
1059  char* line = NULL;
1060  posix_ssize_t readlen;
1061  struct filter* rule;
1062 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI
1063  const char* loc_ctype;
1064 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI */
1065 
1066 #if !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB)
1067  PRINT_ERROR("Regular expression support disabled by configuration");
1068 #endif /* !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB) */
1069 
1070  /* Set locale and check codeset */
1071  filter_locale = FILTER_CS_ASCII;
1072 #if !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI)
1073 # if CFG_USE_CLB
1074  PRINT_ERROR("Cannot set locale due to configuration");
1075 # endif /* CFG_USE_CLB */
1076  printf("%s: %sCooked character classification codeset: "
1077  "US-ASCII\n", CFG_NAME, MAIN_ERR_PREFIX);
1078 #else /* !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI) */
1079  loc_ctype = posix_setlocale(POSIX_LC_CTYPE, "");
1080  if(NULL == loc_ctype)
1081  {
1082  PRINT_ERROR("Setting locale for category 'LC_CTYPE' failed");
1083  return(-1);
1084  }
1085  else
1086  {
1087  printf("%s: %sCharacter classification locale: %s\n",
1088  CFG_NAME, MAIN_ERR_PREFIX, loc_ctype);
1089  if(utf8)
1090  {
1091  printf("%s: %sCooked character classification codeset: "
1092  "UTF-8\n", CFG_NAME, MAIN_ERR_PREFIX);
1093  filter_locale = FILTER_CS_UTF_8;
1094  }
1095  else
1096  {
1097 # if CFG_USE_XSI
1098  loc_ctype = posix_nl_langinfo(CODESET);
1099 # endif /* CFG_USE_XSI */
1100  /* Check whether fallback to ISO 8859-1 is possible */
1101  if( NULL != strstr(loc_ctype, "8859-1")
1102  || NULL != strstr(loc_ctype, "8859_1")
1103  || NULL != strstr(loc_ctype, "88591") )
1104  {
1105  /* Verify that it is not something like "8859-15" */
1106  if('1' == loc_ctype[strlen(loc_ctype) - (size_t) 1])
1107  {
1108  printf("%s: %sCooked character classification codeset: "
1109  "ISO-8859-1\n", CFG_NAME, MAIN_ERR_PREFIX);
1110  filter_locale = FILTER_CS_ISO8859_1;
1111  }
1112  else
1113  {
1114  PRINT_ERROR("Codeset of locale not supported");
1115  PRINT_ERROR("Supported codesets: US-ASCII, ISO-8859-1, UTF-8");
1116  PRINT_ERROR("(Use \"locale -a\" to find a locale)");
1117  return(-1);
1118  }
1119  }
1120  else
1121  {
1122  if( !strcmp(loc_ctype, "POSIX")
1123  || !strcmp(loc_ctype, "C")
1124  || NULL != strstr(loc_ctype, "ASCII")
1125  || NULL != strstr(loc_ctype, "X3.4") )
1126  {
1127  printf("%s: %sCooked character classification codeset: "
1128  "US-ASCII\n", CFG_NAME, MAIN_ERR_PREFIX);
1129  }
1130  else
1131  {
1132  PRINT_ERROR("Codeset of locale not supported");
1133  PRINT_ERROR("Supported codesets: US-ASCII, ISO-8859-1, UTF-8");
1134  PRINT_ERROR("(Use \"locale -a\" to find a locale)");
1135  return(-1);
1136  }
1137  }
1138  }
1139  }
1140 #endif /* !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI) */
1141 
1142  /*
1143  * Calculate maximum length of score value strings
1144  *
1145  * Note: Use of snprintf() must no longer be SUS Version 2 compatible.
1146  * C99/POSIX.1-2001/SUSv3 semantics are now provided by the POSIX module.
1147  */
1148  rv = posix_snprintf(NULL, 0, "%d", FILTER_SCORE_MAX);
1149  if(0 <= rv)
1150  {
1151  score_len_max = (size_t) rv;
1152  rv = posix_snprintf(NULL, 0, "%d", FILTER_SCORE_MIN);
1153  if(0 <= rv)
1154  {
1155  if((size_t) rv > score_len_max) { score_len_max = (size_t) rv; }
1156  res = 0;
1157  }
1158  }
1159  if(res)
1160  {
1161  PRINT_ERROR("Calculation of maximum score string length failed");
1162  return(res);
1163  }
1164 
1165  /* Step 1 */
1166  if(strlen(scorerc))
1167  {
1168  if(main_debug)
1169  {
1170  printf("%s: %sImport external scorerc: %s\n",
1171  CFG_NAME, MAIN_ERR_PREFIX, scorerc);
1172  }
1173  rv = posix_stat(scorerc, &state);
1174  if(rv) { PRINT_ERROR("Cannot stat scorerc file"); }
1175  else if(POSIX_S_ISREG(state.st_mode))
1176  {
1177  rv = filter_get_pathname(&scorepathname, scorefile_name);
1178  if(!rv)
1179  {
1180  /* Read scorerc file */
1181  rv = fu_open_file(scorerc, &fd, POSIX_O_RDWR, (posix_mode_t) 0);
1182  if(!rv)
1183  {
1184  rv = fu_lock_file(fd);
1185  if(!rv)
1186  {
1187  rv = fu_read_whole_file(fd, &data, &len);
1188  }
1189  fu_close_file(&fd, NULL);
1190  if(!rv)
1191  {
1192  oldscorepathname =
1193  posix_malloc(strlen(scorepathname) + (size_t) 5);
1194  if(NULL == oldscorepathname)
1195  {
1196  PRINT_ERROR("Cannot allocate memory for pathname");
1197  }
1198  else
1199  {
1200  strcpy(oldscorepathname, scorepathname);
1201  strcat(oldscorepathname, ".old");
1202  rv = posix_rename(scorepathname, oldscorepathname);
1203  if(rv)
1204  {
1205  PRINT_ERROR("Renaming score file failed");
1206  }
1207  rv = fu_open_file(scorepathname, &fd,
1208  POSIX_O_WRONLY | POSIX_O_CREAT,
1209  FILTER_PERM);
1210  if(!rv)
1211  {
1212  rv = fu_lock_file(fd);
1213  if(!rv)
1214  {
1215  len = strlen(data);
1216  rv = fu_write_to_filedesc(fd, data, len);
1217  }
1218  fu_close_file(&fd, NULL);
1219  }
1220  }
1221  }
1222  }
1223  }
1224  }
1225  if(rv)
1226  {
1227  PRINT_ERROR("Importing scorerc failed, using local scorefile");
1228  }
1229  }
1230  /* Release memory */
1231  posix_free((void*) data);
1232  posix_free((void*) oldscorepathname);
1233  posix_free((void*) scorepathname);
1234  scorepathname = NULL;
1235 
1236  /* Step 2 */
1237  rv = filter_get_pathname(&scorepathname, scorefile_name);
1238  if(!rv)
1239  {
1240  rv = posix_stat(scorepathname, &state);
1241  if(rv) { PRINT_ERROR("Cannot stat score file"); }
1242  else if(POSIX_S_ISREG(state.st_mode))
1243  {
1244  rv = fu_open_file(scorepathname, &fd, POSIX_O_RDWR, (posix_mode_t) 0);
1245  if(!rv)
1246  {
1247  rv = fu_lock_file(fd);
1248  if(!rv)
1249  {
1250  rv = fu_assign_stream(fd, &fs, "r");
1251  if(!rv)
1252  {
1253  /* Load scoring rules */
1254  if(main_debug)
1255  {
1256  printf("%s: %sLoad scoring rules from: %s\n",
1257  CFG_NAME, MAIN_ERR_PREFIX, scorepathname);
1258  }
1259  while(1)
1260  {
1261  /* Read line */
1262  readlen = posix_getline(&line, &len, fs);
1263  if(-1 == readlen)
1264  {
1265  if(POSIX_ENOMEM == posix_errno)
1266  {
1267  PRINT_ERROR("Cannot allocate memory for score "
1268  "file parser");
1269  }
1270  else
1271  {
1272  /* Check for error */
1273  if(ferror(fs))
1274  {
1275  PRINT_ERROR("Parse error in score file");
1276  }
1277  }
1278  }
1279  if(0 >= readlen) { break; }
1280  else
1281  {
1282  /* Extract data */
1283  if('#' == line[0]) { continue; }
1284  rv = filter_decode_rule(&rule, line, (size_t) readlen,
1285  0);
1286  if(!rv) { filter_add_score_rule(rule); }
1287  }
1288  }
1289  posix_free((void*) line);
1290  rv = 0;
1291  }
1292  }
1293  }
1294  fu_close_file(&fd, &fs);
1295  }
1296  if(rv) { PRINT_ERROR("Importing rules from score file failed"); }
1297  }
1298  /* Release memory */
1299  posix_free((void*) scorepathname);
1300 
1301 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1302  /* Initialize test group checking facility */
1303  if(!strlen(config[CONF_TESTGRP_ERE].val.s))
1304  {
1305  PRINT_ERROR("No ERE for test group matching found in config file");
1306  rv = -1;
1307  }
1308  else
1309  {
1310  /* Compile testgroup ERE */
1311  printf("%s: %sEnabling test group checking facility\n",
1312  CFG_NAME, MAIN_ERR_PREFIX);
1313  rv = filter_compile_ere(&testgroup_cs, &testgroup_ere,
1314  config[CONF_TESTGRP_ERE].val.s);
1315  }
1316  if(rv) { testgroup_ere = NULL; }
1317 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1318 
1319  return(res);
1320 }
1321 
1322 
1323 /* ========================================================================== */
1324 /*! \brief Shutdown filter module
1325  *
1326  * Step 1:
1327  * - Open and lock scorefile
1328  * - Save scoring rules in memory to score file
1329  * - Delete scoring rules from memory
1330  *
1331  * Step 2 (only if \c CONF_SCORERC is configured):
1332  * - Copy \c scorefile to the location configured with \c CONF_SCORERC
1333  */
1334 
1335 void filter_exit(void)
1336 {
1337  const char* scorerc = config[CONF_SCORERC].val.s;
1338  const char* scorepathname = NULL;
1339  char* tmppathname = NULL;
1340  int rv;
1341  int fd = -1;
1342  FILE* fs = NULL;
1343  int fd_tmp = -1;
1344  FILE* fs_tmp = NULL;
1345  char* data = NULL;
1346  size_t len;
1347  struct_posix_stat state;
1348  char* p;
1349 
1350  /* Step 1 */
1351  rv = filter_get_pathname(&scorepathname, scorefile_name);
1352  if(!rv)
1353  {
1354  /* Create scorefile if it does not exist */
1355  rv = posix_stat(scorepathname, &state);
1356  if(rv && POSIX_ENOENT == posix_errno)
1357  {
1358  fu_open_file(scorepathname, &fd_tmp,
1359  POSIX_O_WRONLY | POSIX_O_CREAT, FILTER_PERM);
1360  fu_close_file(&fd_tmp, NULL);
1361  }
1362  /* Open scorefile */
1363  rv = posix_stat(scorepathname, &state);
1364  if(rv) { PRINT_ERROR("Cannot stat score file"); }
1365  else if(POSIX_S_ISREG(state.st_mode))
1366  {
1367  rv = fu_open_file(scorepathname, &fd, POSIX_O_RDWR, (posix_mode_t) 0);
1368  if(!rv)
1369  {
1370  rv = fu_lock_file(fd);
1371  if(!rv)
1372  {
1373  rv = fu_assign_stream(fd, &fs, "r");
1374  if(!rv)
1375  {
1376  /* Open temporary file */
1377  tmppathname = posix_malloc(strlen(scorepathname)
1378  + (size_t) 5);
1379  if(NULL == tmppathname)
1380  {
1381  PRINT_ERROR("Cannot allocate memory for pathname");
1382  }
1383  else
1384  {
1385  strcpy(tmppathname, scorepathname);
1386  strcat(tmppathname, ".new");
1387  rv = fu_open_file(tmppathname, &fd_tmp, POSIX_O_WRONLY
1388  | POSIX_O_CREAT | POSIX_O_TRUNC,
1389  FILTER_PERM);
1390  /*
1391  * Because we have the lock for the score file, it is
1392  * allowed to assume that no other instance of the program
1393  * currently use the temporary filename.
1394  */
1395  if(!rv)
1396  {
1397  rv = fu_assign_stream(fd_tmp, &fs_tmp, "w");
1398  if(!rv)
1399  {
1400  rv = filter_export_score_rules(fs, fs_tmp);
1401  }
1402  /* Flush stream of temporary file*/
1403  if (!rv) { rv = fu_sync(fd_tmp, fs_tmp); }
1404  /* Rename temporary file to score file */
1405  if(!rv)
1406  {
1407  rv = posix_rename(tmppathname, scorepathname);
1408  }
1409  if(rv)
1410  {
1411  if(tmppathname)
1412  {
1413  (void) fu_unlink_file(tmppathname);
1414  }
1415  }
1416  }
1417  fu_close_file(&fd_tmp, &fs_tmp);
1418  }
1419  }
1420  }
1421  }
1422  fu_close_file(&fd, &fs);
1423  }
1424  if(rv) { PRINT_ERROR("Exporting rules to score file failed"); }
1425  }
1426  filter_delete_score_rules();
1427 
1428  /* Step 2 */
1429  if(!rv && strlen(scorerc))
1430  {
1431  /* Read scorefile */
1432  rv = fu_open_file(scorepathname, &fd, POSIX_O_RDWR,
1433  (posix_mode_t) 0);
1434  if(!rv)
1435  {
1436  rv = fu_lock_file(fd);
1437  if(!rv)
1438  {
1439  rv = fu_read_whole_file(fd, &data, &len);
1440  }
1441  fu_close_file(&fd, NULL);
1442  if(!rv)
1443  {
1444  /* Write scorerc file */
1445  if(main_debug)
1446  {
1447  printf("%s: %sExport to external scorerc: %s\n",
1448  CFG_NAME, MAIN_ERR_PREFIX, scorerc);
1449  }
1450  p = posix_realloc(tmppathname, strlen(scorerc) + (size_t) 5);
1451  if(NULL == p)
1452  {
1453  PRINT_ERROR("Cannot allocate memory for pathname");
1454  }
1455  else
1456  {
1457  tmppathname = p;
1458  strcpy(tmppathname, scorerc);
1459  strcat(tmppathname, ".new");
1460  rv = fu_open_file(tmppathname, &fd,
1461  POSIX_O_WRONLY | POSIX_O_CREAT, FILTER_PERM);
1462  if(!rv)
1463  {
1464  rv = fu_lock_file(fd);
1465  if(!rv)
1466  {
1467  len = strlen(data);
1468  rv = fu_write_to_filedesc(fd, data, len);
1469  if(rv) { rv = fu_sync(fd, NULL); }
1470  if(rv)
1471  {
1472  PRINT_ERROR("Writing data to scorerc file failed");
1473  }
1474  else
1475  {
1476  rv = posix_rename(tmppathname, scorerc);
1477  if(rv)
1478  {
1479  PRINT_ERROR("Renaming new scorerc file failed");
1480  }
1481  }
1482  }
1483  fu_close_file(&fd, NULL);
1484  }
1485  }
1486  }
1487  }
1488  if(rv)
1489  {
1490  PRINT_ERROR("Exporting score file data to scorerc failed");
1491  }
1492  }
1493 
1494  /* Release memory */
1495  posix_free((void*) data);
1496  posix_free((void*) tmppathname);
1497  posix_free((void*) scorepathname);
1498 
1499 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1500  /* Destroy testgroup ERE */
1501  if(NULL != testgroup_ere)
1502  {
1503  /* Destroy regular expression object */
1504  posix_regfree(testgroup_ere);
1505  posix_free((void*) testgroup_ere);
1506  testgroup_ere = NULL;
1507  }
1508 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1509 
1510  /* Clear locale configuration */
1511  filter_locale = FILTER_CS_ASCII;
1512 }
1513 
1514 
1515 /* ========================================================================== */
1516 /*! \brief Check for test group
1517  *
1518  * \param[in] group Single newsgroup name (not list)
1519  *
1520  * The test group ERE from the configuration is used for matching.
1521  *
1522  * \return
1523  * - 1 if \e group is a test group
1524  * - 0 otherwise
1525  */
1526 
1527 int filter_check_testgroup(const char* group)
1528 {
1529  int res = 0;
1530 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1531  int rv = 0;
1532  const char* string;
1533  const char* p = NULL;
1534  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
1535 
1536  /* Check whether testgroup ERE really was compiled */
1537  if(NULL == testgroup_ere)
1538  {
1539  PRINT_ERROR("Test group check failed (ERE not compiled)");
1540  }
1541  else
1542  {
1543  string = group;
1544  if(FILTER_CS_ASCII == testgroup_cs)
1545  {
1546  rv = enc_ascii_check(string);
1547  }
1548  else if(FILTER_CS_ISO8859_1 == testgroup_cs)
1549  {
1550  /* Try to convert data to ISO 8859-1 */
1551  p = enc_convert_to_8bit(&charset, string, NULL);
1552  if(NULL == p) { rv = -1; }
1553  else if(ENC_CS_ISO8859_1 != charset) { rv = -1; }
1554  else { string = p; }
1555  }
1556  if(rv)
1557  {
1558  PRINT_ERROR("Test group name cannot be checked with current locale");
1559  }
1560  else if(!posix_regexec(testgroup_ere, string, 0, NULL, 0))
1561  {
1562  printf("%s: %sTest group detected\n", CFG_NAME, MAIN_ERR_PREFIX);
1563  res = 1;
1564  }
1565  }
1566 
1567  /* Release memory for ISO 8859-1 string */
1568  if(NULL != p && group != p) { enc_free((void*) p); }
1569 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1570 
1571  return(res);
1572 }
1573 
1574 
1575 /* ========================================================================== */
1576 /*! \brief Check for own article
1577  *
1578  * \param[in] he Pointer to article hierarchy element
1579  *
1580  * The identity configuration is used as reference for matching.
1581  *
1582  * \return
1583  * - 1 if \e he corresponds to own article
1584  * - 0 otherwise
1585  */
1586 
1588 {
1589  int res = 0;
1590 
1591  if(NULL != he->header)
1592  {
1593  /*
1594  * Note 1:
1595  * The element 'from' (corresponding to the mandatory header field "From")
1596  * is never 'NULL'. If the header field is missing in the article, the
1597  * constructor for the hierarchy element in the CORE module inserts a
1598  * valid empty string.
1599  *
1600  * Note 2:
1601  * Configurations elements of string type are never 'NULL'. If no value
1602  * is found in the configfile, the FILTER module inserts valid empty
1603  * strings.
1604  */
1605 
1606  /*
1607  * This is the simple default rule
1608  * It matches to the identity configuration of the user.
1609  */
1610  if(he->header->from[0] && config[CONF_FROM].val.s[0])
1611  {
1612  if(!strcmp(he->header->from, config[CONF_FROM].val.s)
1613  || !strcmp(he->header->from, config[CONF_REPLYTO].val.s))
1614  {
1615  res = 1;
1616  }
1617  }
1618  /* Hook in more sophisticated custom code here if desired */
1619  }
1620 
1621  return(res);
1622 }
1623 
1624 
1625 /* ========================================================================== */
1626 /*! \brief Check for reply to own article
1627  *
1628  * \param[in] he Pointer to article hierarchy element
1629  *
1630  * \return
1631  * - 1 if \e he corresponds to a reply to an own article
1632  * - 0 otherwise
1633  */
1634 
1636 {
1637  int res = 0;
1638  const char* last_ref = "";
1639  size_t i;
1640 
1641  if(NULL != he->parent)
1642  {
1643  res = filter_match_own(he->parent);
1644  if(res)
1645  {
1646  /* Verify that there are no missing articles in between */
1647  if(NULL != he->header->refs)
1648  {
1649  i = 0;
1650  while(NULL != he->header->refs[i])
1651  {
1652  last_ref = he->header->refs[i++];
1653  }
1654  }
1655  if(strcmp(last_ref, he->parent->header->msgid))
1656  {
1657  /* There are missing article(s) in between */
1658  res = 0;
1659  }
1660  }
1661  }
1662 
1663  return(res);
1664 }
1665 
1666 
1667 /* ========================================================================== */
1668 /*! \brief Get article score
1669  *
1670  * \param[in] he Pointer to article hierarchy element
1671  *
1672  * \return
1673  * - Score of article
1674  * - 0 if no score is defined for article corresponding to \e he .
1675  */
1676 
1678 {
1679  int res = 0;
1680  struct filter* rule = scores;
1681  const char* data;
1682  size_t i;
1683 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1684  int rv;
1685  const char* string;
1686  const char* p = NULL;
1687  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
1688 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1689 
1690  if(NULL != he->header)
1691  {
1692  while(NULL != rule)
1693  {
1694  /* Check whether group matches wildmat */
1695  if(!filter_group_check(rule, he->header->groups))
1696  {
1697  /* Yes => Check rule type */
1698  data = NULL;
1699  switch(rule->type)
1700  {
1701  /* ----------------------------------------------------------- */
1702  /* Literal matching */
1703  case SCORE_TYPE_FROM:
1704  {
1705  if(NULL == data) { data = he->header->from; }
1706  /* No break here is intended! */
1707  }
1708  case SCORE_TYPE_SUBJECT:
1709  {
1710  if(NULL == data) { data = he->header->subject; }
1711  if(!strcmp(rule->string, data))
1712  {
1713  res = filter_score_add(res, rule->value);
1714  }
1715  break;
1716  }
1717  /* ----------------------------------------------------------- */
1718  /* Literal matching against field element */
1719  case SCORE_TYPE_GROUP:
1720  {
1721  i = 0;
1722  /* Assignment in truth expression is intended! */
1723  while(NULL != (data = he->header->groups[i++]))
1724  {
1725  if(!strcmp(rule->string, data))
1726  {
1727  res = filter_score_add(res, rule->value);
1728  }
1729  }
1730  break;
1731  }
1732  /* ----------------------------------------------------------- */
1733  /* Extended regular expression matching */
1734  case SCORE_TYPE_FROM_ERE:
1735  {
1736  if(NULL == data) { data = he->header->from; }
1737  /* No break here is intended! */
1738  }
1739  case SCORE_TYPE_SUBJECT_ERE:
1740  {
1741  if(NULL == data) { data = he->header->subject; }
1742  /* No break here is intended! */
1743  }
1744  case SCORE_TYPE_MSGID_ERE:
1745  {
1746  if(NULL == data) { data = he->header->msgid; }
1747 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1748  if(NULL == rule->ere)
1749  {
1750  /*
1751  * If this happens, the 'dcre' parameter of the score rule
1752  * constructor was not used correctly.
1753  */
1754  PRINT_ERROR("Regular expression not compiled (bug)");
1755  }
1756  else
1757  {
1758  rv = 0;
1759  string = data;
1760  if(FILTER_CS_ASCII == rule->cs)
1761  {
1762  rv = enc_ascii_check(string);
1763  }
1764  else if(FILTER_CS_ISO8859_1 == rule->cs)
1765  {
1766  /* Try to convert data to ISO 8859-1 */
1767  p = enc_convert_to_8bit(&charset, string, NULL);
1768  if(NULL == p) { rv = -1; }
1769  else if(ENC_CS_ISO8859_1 != charset) { rv = -1; }
1770  else { string = p; }
1771  }
1772  if(!rv && !posix_regexec(rule->ere, string, 0, NULL, 0))
1773  {
1774  res = filter_score_add(res, rule->value);
1775  }
1776  /* Release memory for ISO 8859-1 string */
1777  if(NULL != p && data != p) { enc_free((void*) p); }
1778  }
1779 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1780  break;
1781  }
1782  /* ----------------------------------------------------------- */
1783  default:
1784  {
1785  PRINT_ERROR("Unknown type of score rule (bug)");
1786  break;
1787  }
1788  }
1789  }
1790  rule = rule->next;
1791  }
1792  }
1793 
1794  return(res);
1795 }
1796 
1797 
1798 /* ========================================================================== */
1799 /*! \brief Get codeset of locale category \c LC_CTYPE
1800  *
1801  * \return
1802  * - Codeset ID of locale category \c LC_CTYPE
1803  */
1804 
1805 enum filter_cs filter_get_locale_ctype(void)
1806 {
1807  return(filter_locale);
1808 }
1809 
1810 
1811 /*! @} */
1812 
1813 /* EOF */
core_hierarchy_element::header
struct core_article_header * header
Definition: core.h:143
fu_write_to_filedesc
int fu_write_to_filedesc(int filedesc, const char *buffer, size_t len)
Write data block to filedescriptor.
Definition: fileutils.c:542
filter_get_score
int filter_get_score(const struct core_hierarchy_element *he)
Get article score.
Definition: filter.c:1677
filter_match_own
int filter_match_own(const struct core_hierarchy_element *he)
Check for own article.
Definition: filter.c:1587
enc_free
void enc_free(void *p)
Free an object allocated by encoding module.
Definition: encoding.c:8868
fu_assign_stream
int fu_assign_stream(int filedesc, FILE **stream, const char *mode)
Assign I/O stream to open file.
Definition: fileutils.c:373
filter_get_locale_ctype
enum filter_cs filter_get_locale_ctype(void)
Get codeset of locale category LC_CTYPE.
Definition: filter.c:1805
enc_mime_cs
enc_mime_cs
IDs for supported MIME character sets.
Definition: encoding.h:59
fu_lock_file
int fu_lock_file(int filedesc)
Lock file for writing.
Definition: fileutils.c:328
filter_exit
void filter_exit(void)
Shutdown filter module.
Definition: filter.c:1335
enc_ascii_check
int enc_ascii_check(const char *s)
Verify ASCII encoding.
Definition: encoding.c:4944
config
struct conf config[CONF_NUM]
Global configuration.
Definition: conf.c:63
conf_entry_val::s
char * s
Definition: conf.h:103
core_article_header::subject
const char * subject
Definition: core.h:112
CONF_FROM
Definition: conf.h:46
fu_create_path
int fu_create_path(const char *path, posix_mode_t perm)
Create path.
Definition: fileutils.c:119
CONF_SCORERC
Definition: conf.h:50
core_article_header::groups
const char ** groups
Definition: core.h:110
enc_wm_pattern
Wildmat array element (for RFC 3977 wildmat-pattern)
Definition: encoding.h:139
main_debug
int main_debug
Enable additional debug output if nonzero.
Definition: main.cxx:64
CONF_REPLYTO
Definition: conf.h:47
core_article_header::from
const char * from
Definition: core.h:111
core_hierarchy_element::parent
struct core_hierarchy_element * parent
Definition: core.h:145
core_article_header::refs
const char ** refs
Definition: core.h:119
MAIN_ERR_PREFIX
#define MAIN_ERR_PREFIX
Message prefix for FILTER module.
Definition: filter.c:58
filter_check_testgroup
int filter_check_testgroup(const char *group)
Check for test group.
Definition: filter.c:1527
fu_unlink_file
int fu_unlink_file(const char *pathname)
Unlink file.
Definition: fileutils.c:355
core_hierarchy_element
Node in article hierarchy.
Definition: core.h:136
PRINT_ERROR
#define PRINT_ERROR(s)
Prepend module prefix and print error message.
Definition: main.h:19
data
struct core_data data
Global data object (shared by all threads)
Definition: core.c:242
enc_destroy_wildmat
void enc_destroy_wildmat(struct enc_wm_pattern **obj, int num)
Destroy wildmat pattern array.
Definition: encoding.c:5537
xdg_get_confdir
const char * xdg_get_confdir(const char *)
Get configuration directory.
Definition: xdg.c:115
xdg_append_to_path
int xdg_append_to_path(const char **, const char *)
Append path component to buffer.
Definition: xdg.c:55
enc_create_wildmat
int enc_create_wildmat(struct enc_wm_pattern **obj, const char *wm)
Create wildmat pattern array.
Definition: encoding.c:5371
filter_init
int filter_init(int utf8)
Initialize filter module.
Definition: filter.c:1047
fu_sync
int fu_sync(int filedesc, FILE *stream)
Flush buffers of file.
Definition: fileutils.c:402
fu_close_file
void fu_close_file(int *filedesc, FILE **stream)
Close file (and potentially associated I/O stream)
Definition: fileutils.c:290
conf::val
union conf_entry_val val
Definition: conf.h:111
FILTER_PERM
#define FILTER_PERM
Permissions for score file.
Definition: filter.c:61
CONF_TESTGRP_ERE
Definition: conf.h:61
filter_match_reply_to_own
int filter_match_reply_to_own(const struct core_hierarchy_element *he)
Check for reply to own article.
Definition: filter.c:1635
ENC_CS_ISO8859_1
Definition: encoding.h:63
enc_convert_to_8bit
const char * enc_convert_to_8bit(enum enc_mime_cs *charset, const char *s, const char **cs_iana)
Convert string from Unicode (UTF-8 NFC) to an 8bit character set.
Definition: encoding.c:6005
core_article_header::msgid
const char * msgid
Definition: core.h:109
fu_read_whole_file
int fu_read_whole_file(int filedesc, char **buffer, size_t *len)
Read text file content and store it into memory buffer.
Definition: fileutils.c:445
fu_open_file
int fu_open_file(const char *pathname, int *filedesc, int mode, posix_mode_t perm)
Open file.
Definition: fileutils.c:243
print_error
void print_error(const char *)
Print error message.
Definition: main.cxx:276

Generated at 2024-04-27 using  doxygen