Reporter: Warren Jones Tue Mar 26 17:28:31 1996 Problem: When using the "-l" flag, symbolic links can result in multiple index entries for a single document or (worse yet) endless looping. Solution: Use a hash to keep track of files that we've already indexed by device and inode. Index: index.c =================================================================== RCS file: /usr0/wjones/src/CVS.repo/swish/src/index.c,v retrieving revision 1.1.1.1 diff -c -r1.1.1.1 index.c *** index.c 1995/10/18 18:43:27 1.1.1.1 --- index.c 1995/10/25 21:11:16 *************** *** 7,12 **** --- 7,60 ---- #include "swish.h" #include "index.h" + #include "hash.h" + + /* Have we already indexed a file or directory? + ** This function is used to avoid multiple index entries + ** or endless looping due to symbolic links. + */ + + int already_indexed( path ) + char *path; + { + static struct dev_ino { + dev_t dev; + ino_t ino; + struct dev_ino *next; + } *inode_hash[BIGHASHSIZE], *p; + + struct stat buf; + char key[34]; /* Hash key -- allow for 64 bit inodes */ + unsigned hashval; + + if ( stat( path, &buf ) ) + return 0; + + /* Create hash key: string contains device and inode. */ + sprintf( key, "%lx/%lx", (unsigned long)buf.st_dev, + (unsigned long)buf.st_ino ); + + hashval = bighash(key); /* Search hash for this file. */ + for ( p = inode_hash[hashval]; p != NULL; p = p->next ) + if ( p->dev == buf.st_dev && + p->ino == buf.st_ino ) + { /* We found it. */ + if ( verbose == 3 ) + printf( "Skipping %s: %s\n", + path, "Already indexed." ); + return 1; + } + + /* Not found, make new entry. */ + p = (struct dev_ino*)emalloc(sizeof(struct dev_ino)); + p->dev = buf.st_dev; + p->ino = buf.st_ino; + p->next = inode_hash[hashval]; + inode_hash[hashval] = p; + + return 0; + } + /* Recursively goes into a directory and calls the word-indexing ** functions for each file that's found. *************** *** 31,36 **** --- 79,87 ---- if (islink(dir) && !followsymlinks) return; + if ( already_indexed(dir) ) + return; + if (dir[strlen(dir) - 1] == '/') dir[strlen(dir) - 1] = '\0'; *************** *** 103,108 **** --- 154,162 ---- if (!isdirectory(s)) { + if ( already_indexed(s) ) + continue; + if (!isoksuffix(dp->d_name, suffixlist)) continue; *************** *** 144,149 **** --- 198,206 ---- struct swline *tmplist; if (islink(path) && !followsymlinks) + return; + + if ( already_indexed(path) ) return; if (path[strlen(path) - 1] == '/')