使用的是posix 正则库,参考:



gcc myreg.c

ip.pat 内容:


ip.txt 内容:


./a.out ip.pat ip.txt


/*  myreg.c  */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <regex.h>
#include <unistd.h>

#define MAX 256

/* 存放匹配位置信息的结构体 */
typedef struct placemsg_t
 int start[MAX]; //匹配的开始位置
 int end[MAX];   //匹配的结束位置
 int count;      //匹配次数
} placemsg;

#define TIMES 100
#define MAX_PATTERN_LEN 8192

 * @brief 实现一个字符十六进制到十进制的转换
 * @param c 需要转换的字符
 * @return 错误返回 -1
static int hex2dec(char c)
        switch (c)
                case '0' ... '9':
                        return c - '0';
                case 'a' ... 'f':
                        return c - 'a' + 10;
                case 'A' ... 'F':
                        return c - 'A' + 10;
   fprintf(stderr, "hex2dec: bad value!\n");
            return -1;          

 * @brief  把正则的\xHH格式的十六进制形式用字符形式代替
 * takes a string with \xHH escapes and returns one with the characters they stand for
 * @param s \xHH形式的正则表达式
 * @return  返回字符形式的正则表达式
static char *pre_process(char *s)
        char *result = malloc(strlen(s) + 1);
        int sindex = 0, rindex = 0;
        while( sindex < strlen(s) ) 
            if( sindex + 3 < strlen(s) &&
                s[sindex] == '\\' && s[sindex+1] == 'x' &&
                isxdigit(s[sindex + 2]) && isxdigit(s[sindex + 3]) )
                        /* carefully remember to call tolower here... */
                        result[rindex] = tolower( hex2dec(s[sindex + 2])*16 +
                                                  hex2dec(s[sindex + 3] ) );
                        sindex += 3; /* 4 total */
                        result[rindex] = tolower(s[sindex]);

        result[rindex] = '\0';

return result;

// Returns true if the line (from a pattern file) is a comment
static int is_comment(char* line)
 unsigned int i;
    // blank lines are comments
    if(strlen(line) == 0) return 1;

// lines starting with # are comments
    if(line[0] == '#') return 1;

// lines with only whitespace are comments
    for(i = 0; i < strlen(line); i++)
   return 0;
    return 1;

static char *get_protocol_name (char *line, char **patname)
 unsigned int i, j;
 char *name = *patname;
 j = 0;
 for (i=0; i<strlen(line); i++)
  if(!isspace (line[i]))
   name[j] = line[i];
 return name;

int last_mark (char *str, char mark)
 int site = 0;  
 int count = 0;
 int size = strlen(str);
 while (site <= size)
  if (str[site++] == mark)
   count = site; 
 return count;

int first_mark (char *str, char mark, int num)
 int count = num;
 while (1)  
  if (str[count++] == mark)
 return count; 

/* 从文件全名中把文件名提取出来,没有后缀 */
int substr (char *srcstr, char **decstr, int lastslash, int firstdot)
 int i = 0;
 //int ls = lastslash;
 char *str = *decstr;
 //printf("last=%d,first=%d\n", lastslash, firstdot);
 //printf("size=%d\n", firstdot-lastslash);
 int size = firstdot-lastslash-1;
 for (i=0; i<size; i++)
  //str[i] = srcstr[ls++];
  str[i] = srcstr[lastslash++];

int basename(char *file, char **name)
 int lastnum = last_mark (file, '/');
 int firstnum = first_mark (file, '.', lastnum);
 substr (file, name, lastnum, firstnum);
 //printf ("name = %s\n", name);

return 0;

 * @brief 以下是pcre匹配的相关函数原型:
 *   int regcomp(regex_t *preg, const char *regex, int cflags);
 *     REG_EXTENDED  支持扩展的正则
 *     REG_NEWLINE;  包括换行
 *      int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
 *   eflags = REG_NOTBOL | REG_NOTEOL;
 *      REG_NOTBOL  行结尾
 *      REG_NOTEOL  文件结尾
 *          size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
 *          void regfree(regex_t *preg);
 * @param regexp 正则表达式模式文件
 * @param file_path 要进行匹配的文件路径
 * @return  匹配信息结构体
struct placemsg_t reg(char *regexpfile, char *file_path)
  regex_t preg;
  struct placemsg_t placeinfo;

regmatch_t pmatch[MAX] = {};
  size_t nmatch = MAX;
  size_t length;
  char errbuf[MAX] = {};
  size_t errbuf_size;
  int res;
  int errcode;

FILE *fp;
  int fd;
  long sitestart;
  long siteend;
  long filesize;
  char *string;

int patternlen, i;
  int flag = 1;
  FILE * fp2;
  char * line = NULL;
  size_t len = 0;
  ssize_t size;

char *filename = (char*)malloc(256);
  basename(regexpfile, &filename);

 //printf ("1regexpfile=%s\n", regexpfile);
 fp2 = fopen (regexpfile, "r");
 if (fp2 == NULL)
 flag = 1;
 while ((size= getline(&line, &len, fp2)) != -1)
  line[strlen(line)-1] = '\0';
  //printf ("line=%s\n", line);
  else if((strstr(line, filename) == 0) && flag)
   flag = 0;
  else if(flag)
  else if (flag == 0)
   printf("正则表达式是:%s\n", line);
 fp2 = NULL;
 printf ("2line=%s\n", line);

 char *regexpstr = pre_process(line); /* do \xHH escapes */
 printf("regexpstr=%s\n", regexpstr);

printf("file_path=%s\n", file_path);
  fp = fopen(file_path, "r");
  if (fp == NULL)

fseek(fp, 0, SEEK_SET);
  sitestart = ftell(fp);
  fseek(fp, 0, SEEK_END);
  siteend = ftell(fp);
  filesize = siteend - sitestart;
  printf("filezize=%d\n", filesize);
  string = (char*) malloc(sizeof(char)*filesize+4);
  memset(string, 0, filesize+4);
  fp = NULL;

fd = open(file_path, O_RDONLY);
  if (fd == -1)
 perror ("open");

res = read(fd, string, filesize);
  if (res == -1)
 perror ("read");

  printf("res=%d,string=%s\n", res, string);

printf("begin regcomp.........\n");
  int cflags = REG_EXTENDED | REG_NEWLINE;
  //int cflags = REG_EXTENDED;
  //int cflags = 0;
  res = regcomp (&preg, regexpstr, cflags);

printf("begin regexec.........\n");
  //int eflags = REG_NOTBOL | REG_NOTEOL;
  int eflags =  REG_NOTEOL;
  //int eflags = 0;
  int place[5];
  int start_front=0;
  int start_now=0;
  int end_front=0;
  int end_now=0;
  int temp = 0;
  char *p = string;
  i = 0;
  while (1)
    res = regexec (&preg, p, nmatch, pmatch, eflags);
    if (res == 0)
   start_now = pmatch[0].rm_so;
   end_now = pmatch[0].rm_eo;
   temp = end_now - start_now;
   //printf("temp=%d\n", temp);
   start_now = end_front + start_now;
   //printf("start_now=%d\n", start_now);
      end_now = start_now + temp;
   //printf("end_now=%d\n", end_now);

start_front = start_now;
   end_front = end_now;

//printf("start place=%d\n", pmatch[0].rm_so);
   //printf("end place=%d\n", pmatch[0].rm_eo);
   //printf("start place=%d\n", start_front);
   //printf("end place=%d\n", end_front);
   placeinfo.start[i] = start_front;
   placeinfo.end[i] = end_front;

p += pmatch[0].rm_eo;

   printf("no match\n");

  placeinfo.count = i;
  //printf(".....i=%d\n", i);
  length = regerror (res, &preg, errbuf, errbuf_size);



return placeinfo;

int main (int argc, char **argv)
 //char *regexp = "r.t";
 char *regexpfile = argv[1];
 printf("regexpfile=%s\n", regexpfile);
 //char *file_path = "t.txt";
 char *file_path = argv[2];

struct placemsg_t placeinfo;

placeinfo = reg(regexpfile, file_path);
 int size = placeinfo.count;
 int i = 0;
 for (i=0; i<size; i++)
  printf ("start[%d]=%d\n", i, placeinfo.start[i]);
  printf ("end[%d]=%d\n", i, placeinfo.end[i]);
 return 0;


  1. MonoDevelop 4.0.9 on CentOS 6.3 安装笔记
  2. Node.js大众点评爬虫
  3. PullToRefresh
  4. textField和textView限制输入条件
  5. js 删除确定
  6. angularJS——自定义指令
  7. MapReduce实现TopK的示例
  8. BZOJ1758: [Wc2010]重建计划
  9. 房租管理小软件(四):对linq的使用
  10. CC2530芯片介绍
  11. HTML5 + CSS3 + JavaScript
  12. C/C++变量命名规则
  13. python-冒泡排序,升序、降序
  14. ADC/DAC设计常见40问
  15. Promise,我们来实战
  16. [HNOI2009]通往城堡之路
  17. catkin_make编译错误
  18. Spring中事务配置以及事务不起作用可能出现的问题
  19. EventBus简单封装
  20. ubuntu安装pgAdmin 4


  1. sql server单引号和双引号的区别
  2. 九度OJ 1051:数字阶梯求和 (大数运算)
  3. java 核心技术卷一 知识点
  4. ubuntu 14.04 用 shell 方便安装nginx
  5. 3D文字特效
  6. 51nod 1040
  7. Python中出现“TabError: inconsistent use of tabs and spaces in indentation”问题的解决
  8. BZOJ 3410 [Usaco2009 Dec]Selfish Grazing 自私的食草者:贪心【最多线段覆盖】
  9. 搭建LoadRunner中的场景(二) 集合点
  10. 企业安全建设之搭建开源SIEM平台(上)