rexdfa.h
Go to the documentation of this file.
00001 /*
00002  *  Regular Pattern Analyzer Toolkit (RPA/Tk)
00003  *  Copyright (c) 2009-2012 Martin Stoilov
00004  *
00005  *  This program is free software: you can redistribute it and/or modify
00006  *  it under the terms of the GNU General Public License as published by
00007  *  the Free Software Foundation, either version 3 of the License, or
00008  *  (at your option) any later version.
00009  *
00010  *  This program is distributed in the hope that it will be useful,
00011  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  *  GNU General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  *
00018  *  Martin Stoilov <martin@rpasearch.com>
00019  */
00020 
00030 #ifndef _REXDFA_H_
00031 #define _REXDFA_H_
00032 
00033 #ifdef __cplusplus
00034 extern "C" {
00035 #endif
00036 
00037 #ifndef REX_USERDATA_TYPE
00038 #ifdef WIN32
00039 typedef size_t rexuserdata_t;
00040 #else
00041 typedef unsigned long rexuserdata_t;
00042 #endif
00043 #else
00044 typedef REX_USERDATA_TYPE rexuserdata_t;
00045 #endif
00046 
00047 #ifndef REX_UWORD_TYPE
00048 typedef unsigned long rexuword_t;
00049 #else
00050 typedef REX_UWORD_TYPE rexuword_t;
00051 #endif
00052 
00053 #ifndef REX_UINT_TYPE
00054 typedef unsigned int rexuint_t;
00055 #else
00056 typedef REX_UINT_TYPE rexuint_t;
00057 #endif
00058 
00059 
00060 #ifndef REX_CHAR_TYPE
00061 typedef unsigned int rexchar_t;
00062 #else
00063 typedef REX_CHAR_TYPE rexchar_t;
00064 #endif
00065 #define REX_CHAR_MAX ((rexchar_t)-1)
00066 #define REX_CHAR_MIN ((rexchar_t)0)
00067 
00068 
00072 typedef struct rexdfss_s {
00073         rexuint_t type;                                 
00074         rexuint_t uid;                                  
00075         rexuserdata_t userdata;                 
00077 } rexdfss_t;
00078 
00079 
00083 typedef struct rexdft_s {
00084         rexchar_t lowin;                                
00085         rexchar_t highin;                               
00086         rexuint_t state;                                
00087 } rexdft_t;
00088 
00089 
00093 typedef struct rexdfs_s {
00094         rexuint_t type;                                 
00095         rexuint_t trans;                                
00096         rexuint_t ntrans;                               
00097         rexuint_t accsubstates;                 
00098         rexuint_t naccsubstates;                
00099         rexuint_t substates;                    
00100         rexuint_t nsubstates;                   
00101 } rexdfs_t;
00102 
00103 
00107 typedef struct rexdfa_s {
00108         rexuint_t nstates;                              
00109         rexdfs_t *states;                               
00110         rexuint_t ntrans;                               
00111         rexdft_t *trans;                                
00112         rexuint_t naccsubstates;                
00113         rexdfss_t *accsubstates;                
00114         rexuint_t nsubstates;                   
00115         rexdfss_t *substates;                   
00116         unsigned int hbytes;
00117         unsigned int hbits;
00118         unsigned int hsize;
00119         unsigned char *bits;
00120         rexuword_t reserved[64];
00121 } rexdfa_t;
00122 
00123 
00127 typedef enum {
00128         REX_STATETYPE_NONE = 0,                 
00129         REX_STATETYPE_START = 1,                
00130         REX_STATETYPE_ACCEPT = 2,               
00131         REX_STATETYPE_DEAD = 3,                 
00132 } rex_statetype_t;
00133 
00134 
00135 #define REX_DFA_DEADSTATE (0)           
00136 #define REX_DFA_STARTSTATE (1)          
00147 #define REX_DFA_STATE(__dfa__, __nstate__)                                                      (&(__dfa__)->states[__nstate__])
00148 
00160 #define REX_DFA_TRANSITION(__dfa__, __nstate__, __ntrans__)                     (&(__dfa__)->trans[(REX_DFA_STATE(__dfa__, __nstate__)->trans) + (__ntrans__)])
00161 
00173 #define REX_DFA_SUBSTATE(__dfa__, __nstate__, __nsubstate__)            ((__dfa__)->substates ? &(__dfa__)->substates[REX_DFA_STATE(__dfa__, __nstate__)->substates + (__nsubstate__)] : ((rexdfss_t*)0))
00174 
00185 #define REX_DFA_ACCSUBSTATE(__dfa__, __nstate__, __naccsubstate__)      ((__dfa__)->accsubstates ? &(__dfa__)->accsubstates[REX_DFA_STATE(__dfa__, __nstate__)->accsubstates + (__naccsubstate__)] : ((rexdfss_t*)0))
00186 
00187 
00202 #define REX_DFA_NEXT(__dfa__, __nstate__, __input__, __nextptr__) \
00203                 do { \
00204                         rexdft_t *t; \
00205                         rexuint_t mid, min = 0, max = REX_DFA_STATE(__dfa__, __nstate__)->ntrans; \
00206                         while (max > min) { \
00207                                 mid = (min + max)/2; \
00208                                 t = REX_DFA_TRANSITION(__dfa__, __nstate__, mid); \
00209                                 if ((__input__) >= t->lowin) { \
00210                                         min = mid + 1; \
00211                                 } else { \
00212                                         max = mid; \
00213                                 } \
00214                         } \
00215                         t = REX_DFA_TRANSITION(__dfa__, __nstate__, min-1); \
00216                         *(__nextptr__) = t->state; \
00217                 } while (0)
00218 
00219 
00220 #define REX_DFA_HASHBITS(__bytes__, __bitsperbyte__) ((__bytes__) * (__bitsperbyte__))
00221 #define REX_DFA_HASHSIZE(__bytes__, __bitsperbyte__) (1 << REX_DFA_HASHBITS(__bytes__, __bitsperbyte__))
00222 #define REX_DFA_HASHMASK(__bytes__, __bitsperbyte__) (REX_DFA_HASHSIZE(__bytes__, __bitsperbyte__) - 1)
00223 
00224 #define REX_BITARRAY_BYTE(__arr__, __entry__) (((unsigned char*)__arr__)[((unsigned int)(__entry__))>>3])
00225 #define REX_BITARRAY_GET(__arr__, __entry__) ((((unsigned char*)__arr__)[((unsigned int)(__entry__))>>3] & (1 << (((unsigned int)(__entry__)) & 0x7))) ? 1 : 0)
00226 #define REX_BITARRAY_SET(__arr__, __entry__) do { ((unsigned char*)__arr__)[((unsigned int)(__entry__))>>3] |= (1 << (((unsigned int)(__entry__)) & 0x7)); } while (0)
00227 #define REX_BITARRAY_CLR(__arr__, __entry__) do { ((unsigned char*)__arr__)[((unsigned int)(__entry__))>>3] &= ~(1 << (((unsigned int)(__entry__)) & 0x7)); } while (0)
00228 
00229 
00230 rexdfa_t *rex_dfa_create(rexuint_t nstates, rexuint_t ntrans, rexuint_t naccsubstates, rexuint_t nsubstates);
00231 void rex_dfa_destroy(rexdfa_t *dfa);
00232 int rex_dfa_hash(rexdfa_t *dfa, unsigned int hbytes, unsigned int hbits);
00233 void rex_dfa_dumpstate(rexdfa_t *dfa, rexuint_t nstate);
00234 rexdfs_t *rex_dfa_state(rexdfa_t *dfa, rexuint_t nstate);
00235 rexdft_t *rex_dfa_transition(rexdfa_t *dfa, rexuint_t nstate, rexuint_t ntransition);
00236 rexdfss_t *rex_dfa_substate(rexdfa_t *dfa, rexuint_t nstate, rexuint_t nsubstate);
00237 rexdfss_t *rex_dfa_accsubstate(rexdfa_t *dfa, rexuint_t nstate, rexuint_t naccsubstate);
00238 rexuint_t rex_dfa_next(rexdfa_t *dfa, rexuint_t nstate, rexchar_t input);
00239 
00240 #ifdef __cplusplus
00241 }
00242 #endif
00243 
00244 
00245 #endif /* _REXDFA_H_ */