|
void *
that it passes to each call to the target API.
There are four calls in the target API. Gazelle always calls them in
the order given below. Except for target_present, each one is
called only once.
grep
command. The directory to search is controlled by a parameter in the
gazelle configuration file. Files which grep identifies
as matching the search term are treated as result records.
When the client asks for brief records, the grep target returns the
names of the files; when the client asks for full records, the target
returns the file contents.
The grep target is deliberately very simple. It ignores the attributes associated with terms, and it does not handle complex queries (ANDs, ORs, etc.) It is merely intended as a reference for some of the basic aspects of target implementation. Let's go through the grep target source code, highlighting key issues as they come up:
#include <stdlib.h> #include <unistd.h> #include <stdio.h> #include <dirent.h> #include <sys/types.h> #include <sys/mman.h> #include <sys/stat.h> |
target_api.h,
which contains declarations of the target API's routines and data
structures.
|
#include "target_api.h" #include "str.h" #include "PList.h" #include "Dict.h" |
void * reference to the state structure
for the target implementation. Since we want to keep track of the
directory that we need to search, as well as a list of matching files,
we define the grep_state structure to represent these
two pieces of information:
|
/*
* grep_state
*
* This is the structure the grep target uses to store
* state information between target API calls.
*/
typedef struct {
char *dir; /* the directory to grep */
PList *files; /* a list of matching files */
} grep_state;
|
target_connect routine is called when the client
connects to gazelle. It is passed a dictionary of parameters, and
returns a freshly-allocated state pointer. What the grep target does
for connect is to allocate a state structure, create a new empty list
to hold result records, and retrieve the value of the
"grep_directory" parameter.
|
/*
* target_connect
*
* Allocate a state structure and store the directory
* name and an empty list in it. The directory name will
* be used during target_search to determine which
* directory to execute the "grep" command in.
* Returns the state.
*/
void *target_connect(Dict *params)
{
grep_state *state;
state = (grep_state *) malloc(sizeof(grep_state));
state->files = pList_new();
state->dir = str_dup((char *)dict_get(params,"grep_directory"));
return (void *) state;
}
|
target_search routine is called when the client
executes a search. It's passed the query as translated by the query
translator; in the case of the grep target this should be a simple
term. It's also passed a set of parameters. These are not the same ones as were
passed to target_connect, which are the top-level
parameters, but rather parameters that are specific to a particular
database (see the configuration file reference for information about
supporting multiple databases with one gazelle server). And finally,
it's passed the state pointer.
What the grep target does in its implementation of
|
/*
* target_search
*
* Search the directory for a given regex. This is done
* by opening the directory, calling the "grep" command
* on each file, and checking the return status of the
* command.
*/
int target_search(const char *q, Dict *params, void *v_state)
{
DIR *dp;
struct dirent *d;
int count = 0;
grep_state *state;
PList *files;
char *dir;
/* acquire a (grep_state *) to the state structure, which
is passed in as a (void *) */
state = (grep_state *) v_state;
files = state->files;
dir = state->dir;
/* attempt to open the directory. */
if((dp = opendir(dir)) == NULL) {
return 0;
}
/* iterate over the files in the directory */
while((d = readdir(dp)) != NULL) {
char cmd[256];
char *name = d->d_name;
/* use "cd" to change to the directory (so we don't have
to parse the optional trailing '/' */
/* construct the cd/grep command. */
sprintf(cmd, "cd %s; grep \"%s\" '%s' > /dev/null\n", dir, q, name);
printf("%s\n",cmd);
/* execute it. */
if (! system (cmd) ) {
/* got a hit. append the file name to the list */
printf ("hit on %s\n", name);
pList_add (files, str_dup (name));
/* increment the hit count */
count++;
} /* otherwise didn't get a hit: ignore */
}
closedir(dp);
printf("returning %d hits from target_search\n", count);
/* return the hit count. state contains the names of the
hits. */
return count;
}
|
target_present is called after a search, when the
client wants to retrieve records from the result set. It's passed the
1-based ordinal index of the record in the result set, a flag
indicating that a brief or full record is desired, and the state
pointer. It returns a target_record, a data structure
representing a result record.
For brief records, the grep target returns the name of the matching file. For full records, the grep target reads the file and returns its content. |
/*
* target_present
*
* Return a result record. The grep target returns filenames for
* brief records and file contents for full records.
*/
target_record *target_present(long int ix, int bf, void *v_state)
{
PList *files;
int i;
Position *p;
char *filename;
char *dir;
/* acquire a (grep_state *) to the state structure, which
is passed in as a (void *) */
grep_state *state = (grep_state *) v_state;
files = state->files;
dir = state->dir;
/* step through the hitlist to the appropriate hit */
for(i = 1, p = pList_first(files);
i < ix;
i++, p = pList_next(files, p)) {
printf("fn = %s\n",(char *)pList_retrieve(files,p));
}
/* this is the name of the file the client is requesting
as a result */
filename = (char *) pList_retrieve(files, p);
/* for brief records, simply return the filename as
a plaintext record */
|
target_record_newXX routines. See the target API
reference for detailed summaries of these routines.
|
if(bf == TARGET_BRIEF) {
return target_record_newText(filename,PLAINTEXT_SYNTAX);
} else { /* full record */
char fullpath[256];
long sz;
char *buf, *p;
struct stat st;
FILE *fp;
int c;
/* construct the full path of the file name */
sprintf(fullpath,"%s/%s",dir,filename);
stat (fullpath, &st);
sz = st.st_size;
printf("file %s is %ld long\n",fullpath,sz);
/*printf("file %s is owned by %d\n",fullpath,st.st_uid);*/
/* allocate memory to store the file contents */
p = buf = malloc(sz+1);
/* read the file into memory */
fp = fopen(fullpath,"r");
while((c = getc(fp)) != EOF) {
*p++ = (char) c;
}
fclose(fp);
/* add a null */
*p = '\0';
/* retur new plaintext record containing the file contents */
return target_record_newPlainText(buf);
}
}
|
target_disconnect, we simply clean up our
grep_state. Other targets might have to release resources
such as open files, network connections, or connections to databases.
|
/*
* target_disconnect
*
* Called when the client disconnects. Destroys the grep_state
*/
int target_disconnect(void *v_state)
{
grep_state *state = (grep_state *) v_state;
pList_freeAll(state->files, (Destructor)free);
free(state->dir);
free(state);
return 1;
}
|