index and incoming links

This commit is contained in:
sejo 2021-09-03 18:11:24 -05:00
parent de80a3a6e6
commit 1b095f2c94
1 changed files with 92 additions and 10 deletions

View File

@ -3,15 +3,30 @@
#include <string.h>
#include <dirent.h>
#define NPAGES 256
typedef struct{
char outgoing[256][256];
int nlinks[256];
} Links;
char filenames[NPAGES][256]; /* array of filenames */
char gminames[NPAGES][256];
char htmlnames[NPAGES][256];
char wikinames[NPAGES][256];
int incominglinks[NPAGES][NPAGES]; /* array of indexes */
int nilinks[NPAGES]; /* how many incoming links per page */
int count; /* total number of pages */
} Pages;
int main(int argc, char * argv[]){
DIR * d;
FILE * f;
struct dirent * entry;
Links links;
Pages p;
int i,j;
for(i=0; i<NPAGES; i++){
p.nilinks[i] = 0;
}
/* clean pages.gmo */
remove("src/pages.gmo");
/* open directory */
if( !(d = opendir("src")) ){
@ -19,20 +34,87 @@ int main(int argc, char * argv[]){
return 1;
}
int i=0;
int fileindex=0;
char * ret;
/* for each file */
while( (entry = readdir(d)) ){
/* find gmo files */
if( (ret = strstr( entry->d_name, ".gmo") ) ){
printf("%s\n", entry->d_name);
strcpy ( links.outgoing[i], entry->d_name );
printf("%s\n", links.outgoing[i]);
if( (ret = strstr( entry->d_name, ".gmo") ) ){
/* add to index */
strcpy( p.filenames[fileindex], "src/");
strcat( p.filenames[fileindex], entry->d_name );
/* strcpy ( p.filenames[fileindex], entry->d_name ); */
/* add gmi name */
strcpy ( p.gminames[fileindex], entry->d_name );
ret = strstr( p.gminames[fileindex], ".gmo");
strcpy( ret, ".gmi");
/* add html name */
strcpy( p.htmlnames[fileindex], entry->d_name );
ret = strstr( p.htmlnames[fileindex], ".gmo");
strcpy( ret, ".html");
/* add wikiname */
strcpy( p.wikinames[fileindex], entry->d_name );
ret = strstr(p.wikinames[fileindex], ".gmo");
*ret = '\0'; /* remove suffix */
/* convert _ to spaces: */
for(i=0; p.wikinames[fileindex][i]; i++){
if(p.wikinames[fileindex][i]=='_')
p.wikinames[fileindex][i] = ' ';
}
fileindex++;
}
}
p.count = fileindex;
printf("%d gmo files\n",p.count);
closedir(d); /* close directory */
closedir(d);
/* write index: pages.gmo */
f = fopen("src/pages.gmo","w");
fputs("# index of pages\n\n",f);
for(i=0; i<p.count; i++){
fprintf(f,"=> ./%s {%s}\n",p.gminames[i],p.wikinames[i]);
}
fclose(f);
/* search for incoming links */
int premode = 0;
char line[1024];
char link[1024];
for(i=0; i<p.count; i++){
premode = 0;
printf("scanning %s for outgoing links...\n",p.filenames[i]);
f = fopen(p.filenames[i],"r");
while( fgets(line, 1024, f) ){
if( strncmp( line, "```", 3) == 0){ /* check for pre-mode */
premode = !premode;
}
if( !premode ){
/* search for wikilink */
if( (ret=strchr(line,'{') )){
for(j=1; ret[j]!='}';j++){
link[j-1]=ret[j];
}
link[j-1] = '\0'; /* link has wikilink name */
/* search for wikiname to assign incoming link: */
for(j=0; j<p.count; j++){
if(strcmp(p.wikinames[j],link)==0){
p.incominglinks[j][ p.nilinks[j]++ ] = i;
}
}
printf("%s\n",link);
}
}
}
fclose(f);
}
for(i=0; i<p.count; i++){
printf("%s has %d incoming links\n",p.wikinames[i],p.nilinks[i]);
}
return 0;