#include #include #include #include #include #include #include #include #include "/usr/local/xledir/include/chart-typedefs.h" #include "/usr/local/xledir/include/regexp.h" #include "/usr/local/xledir/include/xle.h" typedef struct socket_state { int sockfd; char buffer[100001]; } socket_state; void error(char *msg) { perror(msg); } // returns the start and end positions of the nearest sexp int find_sexp (char *exp, int *start, int *end) { int i = *start; while (exp[i] != '(' && i < *end) i++; i++; *start = i; int parent_count = 1; while (parent_count > 0 && i < *end) { switch (exp[i]) { case '\\': ++i; break; // escape char case '(': ++parent_count; break; case ')': --parent_count; break; }; ++i; }; if (parent_count == 0) { *end = i - 1; return 0; } else return 1; }; // to do: free allocated memory! // parse the regexp s-expression and build RegExp structures /* void *parse_regexp_string(HEAPptr heap, char *input, int start, int end) { int exp_start = start; int exp_end = end; find_sexp(input, &exp_start, &exp_end); if (strstr(input + exp_start, "PAIR") == input + exp_start) { int i = exp_start + 4; int lstart, lend, ustart, uend, pstart, pend; // int pos = -1; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } ++i; lstart = i; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } lend = i; ++i; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } ++i; ustart = i; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } uend = i; ++i; pstart = i; while (input[i] != ')') { ++i; } pend = i; char *lowerstart = input + lstart; char *upperstart = input + ustart; char *lower = (char *)calloc(lend - lstart + 1, sizeof(char)); char *ll = lower; for (i = lstart; i < lend; i++) *ll++ = *lowerstart++; char *upper = (char *)calloc(uend - ustart + 1, sizeof(char)); char *uu = upper; for (i = ustart; i < uend; i++) *uu++ = *upperstart++; // printf("re_symbol_pair: %s, %s, %d, %d\n", lower, upper, pstart, pend); if (pstart == pend) { return re_symbol_pair(lower,upper,heap); } else { pstart++; char *posstart = input + pstart; char *pos = (char *)calloc(pend - pstart + 1, sizeof(char)); char *pp = pos; for (i = pstart; i < pend; i++) *pp++ = *posstart++; int stringpos = atoi(pos); // printf("re_symbol_pair_pos: %s, %s, %d\n", lower, upper, stringpos); return re_symbol_pair_pos(lower,upper,stringpos,heap); } } else { int first_exp_start = exp_start; int first_exp_end = exp_end; find_sexp(input, &first_exp_start, &first_exp_end); if (strstr(input + exp_start, "OR") == input + exp_start) { // printf("re_altset\n"); return re_altset(parse_regexp_string(heap, input, exp_start, first_exp_end + 1), parse_regexp_string(heap, input, first_exp_end + 1, exp_end), heap);} else if (strstr(input + exp_start, "SEQ") == input + exp_start) { // printf("re_sequence\n"); return re_sequence(parse_regexp_string(heap, input, exp_start, first_exp_end + 1), parse_regexp_string(heap, input, first_exp_end + 1, exp_end), heap);} else return (void *)0; } }; */ // (OR (SEQ (PAIR "s" "s" 1) (SEQ (PAIR "p" "p" 2) (SEQ (PAIR "i" "i" 3) (SEQ (PAIR "s" "s" 4) (PAIR "+Token" "" 0))))) (OR (SEQ (PAIR "s" "s") (SEQ (PAIR "p" "p") (SEQ (PAIR "i" "i") (SEQ (PAIR "s" "s") (SEQ (PAIR "+Noun" "") (SEQ (PAIR "+Indef" "") (SEQ (PAIR "+Sg" "") (PAIR "+Masc" "")))))))) (SEQ (PAIR "s" "s") (SEQ (PAIR "p" "p") (SEQ (PAIR "i" "i") (SEQ (PAIR "s" "s") (SEQ (PAIR "e" "") (SEQ (PAIR "+Verb" "") (PAIR "+Impv" ""))))))))) // original void *parse_regexp_string(HEAPptr heap, char *input, int start, int end) { int exp_start = start; int exp_end = end; find_sexp(input, &exp_start, &exp_end); if (strstr(input + exp_start, "PAIR") == input + exp_start) { int i = exp_start + 4; int lstart, lend, ustart, uend; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } ++i; lstart = i; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } lend = i; ++i; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } ++i; ustart = i; while (input[i] != '"') { ++i; if (input[i] == '\\') ++i; } uend = i; char *lowerstart = input + lstart; char *upperstart = input + ustart; char *lower = (char *)calloc(lend - lstart + 1, sizeof(char)); char *ll = lower; for (i = lstart; i < lend; i++) *ll++ = *lowerstart++; char *upper = (char *)calloc(uend - ustart + 1, sizeof(char)); char *uu = upper; for (i = ustart; i < uend; i++) *uu++ = *upperstart++; // printf("re_symbol_pair: %s, %s\n", lower, upper); return re_symbol_pair(lower,upper,heap); } else { int first_exp_start = exp_start; int first_exp_end = exp_end; find_sexp(input, &first_exp_start, &first_exp_end); if (strstr(input + exp_start, "OR") == input + exp_start) { // printf("re_altset\n"); return re_altset(parse_regexp_string(heap, input, exp_start, first_exp_end + 1), parse_regexp_string(heap, input, first_exp_end + 1, exp_end), heap);} else if (strstr(input + exp_start, "SEQ") == input + exp_start) { // printf("re_sequence\n"); return re_sequence(parse_regexp_string(heap, input, exp_start, first_exp_end + 1), parse_regexp_string(heap, input, first_exp_end + 1, exp_end), heap);} else return (void *)0; } }; void *build_regexp(HEAPptr heap, char *input) { // example regexp /* input = "(OR (SEQ (PAIR \"s\" \"s\") (SEQ (PAIR \"p\" \"p\") (SEQ (PAIR \"i\" \"i\") (SEQ (PAIR \"s\" \"s\") (PAIR \"+Token\" \"\"))))) (OR (SEQ (PAIR \"s\" \"s\") (SEQ (PAIR \"p\" \"p\") (SEQ (PAIR \"i\" \"i\") (SEQ (PAIR \"s\" \"s\") (SEQ (PAIR \"+Noun\" \"\") (SEQ (PAIR \"+Indef\" \"\") (SEQ (PAIR \"+Sg\" \"\") (PAIR \"+Masc\" \"\")))))))) (SEQ (PAIR \"s\" \"s\") (SEQ (PAIR \"p\" \"p\") (SEQ (PAIR \"i\" \"i\") (SEQ (PAIR \"s\" \"s\") (SEQ (PAIR \"e\" \"\") (SEQ (PAIR \"+Verb\" \"\") (PAIR \"+Impv\" \"\")))))))))"; */ void *regexp; regexp = parse_regexp_string(heap, input, 0, strlen(input)); FILE *tmpfile; // print_net_as_regexp(regexp_to_fsm(regexp), 0, 0, 0); return regexp; // return parse_regexp_string(heap, input, 0, strlen(input)); }; void *analyze_to_regexp(char *input, socket_state *state) { // printf("input: '%s'\n", input); strcpy((*state).buffer,input); int n = write((*state).sockfd,(*state).buffer,strlen((*state).buffer)); if (strlen((*state).buffer) < 1) return (void *)0; write((*state).sockfd,"\n",1); // printf("written.\n"); if (n < 0) error("ERROR writing to socket"); bzero((*state).buffer,100001); // printf("Reading...\n"); n = read((*state).sockfd,(*state).buffer,100000); // printf("regexp read:\n"); if (n < 0) error("ERROR reading from socket"); // printf("%s\n",(*state).buffer); HEAPptr heap = init_heap(sizeof(RegExp), 100, "RegExp"); // printf("sizeof(RegExp) = %d\n", sizeof(RegExp)); return build_regexp(heap,(*state).buffer); }; char *character_encoding(void *state) { // return "iso8859-1"; return "utf-8"; }; socket_state *initialize() { int portno; struct sockaddr_in serv_addr; struct hostent *server; struct socket_state *state = (socket_state *)malloc(sizeof(socket_state)); // start the server //system("/home/paul/lisp/projects/xle/morph/clisp -K full -M /home/paul/lisp/projects/xle/morph/morph-server.mem &"); // printf("started.\n"); sleep(1); // printf("Now are we ready.\n"); FILE *tmpfile; char portstr[8]; char morph_filename[64]; sprintf(morph_filename, "/tmp/cgp.socket.%d.%d", getuid(), getpid()); // printf("\nm: morph: %s\n",morph_filename); sleep(1); tmpfile = fopen(morph_filename, "rt"); fgets(portstr, 8, tmpfile); // printf("portstr: %s\n", portstr); fclose(tmpfile); unlink(morph_filename); portno = atoi(portstr); // establish socket connection // portno = 2001; (*state).sockfd = socket(AF_INET, SOCK_STREAM, 0); if ((*state).sockfd < 0) error("ERROR opening socket"); // server = gethostbyname("decentius.aksis.uib.no"); server = gethostbyname("localhost"); // printf("server: %d\n", server); if (server == NULL) { fprintf(stderr,"ERROR, no such host\n"); exit(0); } bzero((char *) &serv_addr, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; bcopy((char *)server->h_addr, (char *)&serv_addr.sin_addr.s_addr, server->h_length); serv_addr.sin_port = htons(portno); if (connect((*state).sockfd,&serv_addr,sizeof(serv_addr)) < 0) error("ERROR connecting"); // send pid sprintf((*state).buffer, "%d\n", getpid()); write((*state).sockfd,(*state).buffer,strlen((*state).buffer)); bzero((*state).buffer,100001); // init_xle(0,0); // initialize_cfsm(); return state; }