/******************************** * * censor.c * Control program for CGI version of * censorship avoidance proof of concept * * This program has been compiled and run under * Windows 2000 and FreeBSD 4.4 * *******************************/ #include "censor.h" /* Global variables */ char m_chrarrDataOUT[BUFSIZE]; char m_chrarrLinkData[BUFSIZE]; char m_chrarrDocRoot[BUFSIZE]; int m_intHostIndex; enum TAGTYPES{ enuTagNotFound = 0, enuHREF, enuAREA, enuIMGSRC, enuFRAME, enuBODY, enuLINK, enuSCRIPT }; /* Function prototypes */ int strncmpIC(char * strA, char * strB, int maxlen); int charIN(char testChar, char * searchChars); void printHTMLError(char * err); void printWelcomeScreen(); int parseHTML(char *chrarrDataIN, int intLen); int getLinkRewrite(char *chrLinkData, int intMaxLen); /******************************** * * * Main entry point of program * * *******************************/ int main(){ char chrarrHostName[HOSTSIZE]; char chrarrDataIN[BUFSIZE]; char chrarrRequest[BUFSIZE], *chrptrQueryString; int intSD, intCntr, intOutputHeadersFlag, intBytesRecieved; int intParseFlag = 0; struct sockaddr_in pin; struct hostent *hp; FILE *fo=NULL, *fraw=NULL; #ifdef WIN32_LEAN_AND_MEAN WSADATA wsaData; #endif m_intHostIndex = -1; chrptrQueryString = getenv("QUERY_STRING"); /* different platforms return NULL or zero length string */ if (chrptrQueryString && strlen(chrptrQueryString) > 0){ int maxid, i, j; char val[10]; /* printHTMLError(chrptrQueryString); exit(0); */ memset(val, '\0', 10); for (i=0; m_HostArray[i].id >= 0; i++) {;} maxid = m_HostArray[i-1].id; i = 0; while (i < 9){ if (!isdigit(chrptrQueryString[i])) break; if (!isdigit(chrptrQueryString[i])){ sprintf(chrarrDataIN, "(1) Invalid input data!\n%d [%s]", i, chrptrQueryString); printHTMLError(chrarrDataIN); } val[i] = chrptrQueryString[i]; i++; } if (i==10) printHTMLError("(2) Invalid input data!\n"); m_intHostIndex = atoi(val); if (m_intHostIndex < 0 || m_intHostIndex > maxid){ sprintf(chrarrDataIN, "(3) Invalid input data!\n%d [%s] %d", m_intHostIndex, val, maxid); printHTMLError(chrarrDataIN); } if (chrptrQueryString[i] != '\0'){/* we have a request */ j = 0; while (j < BUFSIZE - 100 && chrptrQueryString[i] != '\0'){ m_chrarrDataOUT[j] = chrptrQueryString[i]; m_chrarrDocRoot[j] = chrptrQueryString[i]; i++; j++; } m_chrarrDataOUT[j] = '\0'; /* to obtain document root, back up until a slash is found */ while (j>0 && m_chrarrDocRoot[j] != '/') j--; m_chrarrDocRoot[j++] = '/'; m_chrarrDocRoot[j] = '\0'; /* printHTMLError(m_chrarrDocRoot); exit(0); */ if (strlen(m_chrarrDataOUT) > BUFSIZE - 100){/* ensure no buffer overruns on sprintf */ printHTMLError("m_chrarrDataOUT too big!\n"); } sprintf(chrarrRequest, "GET %s HTTP/1.0\nUser-Agent: anti-censor 0.1\n\n", m_chrarrDataOUT); }else{ /* either no request, or we ignore the value */ sprintf(chrarrRequest, "GET / HTTP/1.0\nUser-Agent: anti-censor 0.1\n\n"); sprintf(m_chrarrDocRoot, "/"); } } /* m_intHostIndex=3; sprintf(m_chrarrDocRoot, "/"); */ if (m_intHostIndex < 0){ printWelcomeScreen(); exit(0); } /* printHTMLError(chrarrRequest); exit(0); */ if (DO_DEBUG == 1){/* for testing */ int i=0; char fileName[BUFSIZE]; /* find first available name, quit this after 100 files */ do { sprintf(fileName, "tmp%d.htm", i++); }while ((fo = fopen(fileName, "r")) != NULL && i < 100); if (i < 100){ i--; sprintf(fileName, "tmp%d.htm", i); if ((fo = fopen(fileName, "w")) == NULL){ sprintf(fileName, "Can't open tmp%d.htm!\n", i); printHTMLError(fileName); } sprintf(fileName, "raw%d.dat", i); if ((fraw = fopen(fileName, "w")) == NULL){ sprintf(fileName, "Can't open raw%d.dat!\n", i); printHTMLError(fileName); } } } strncpy(chrarrHostName, m_HostArray[m_intHostIndex].name, HOSTSIZE); #ifdef WIN32_LEAN_AND_MEAN if (WSAStartup(0x202,&wsaData) == SOCKET_ERROR) { sprintf(chrarrDataIN,"WSAStartup failed with error %d\n",WSAGetLastError()); WSACleanup(); printHTMLError(chrarrDataIN); } #endif /* go find out about the desired host machine */ if ((hp = gethostbyname(chrarrHostName)) == NULL) { printHTMLError("gethostbyname error\n"); } /* fill in the socket structure with host information */ memset(&pin, 0, sizeof(pin)); pin.sin_family = AF_INET; pin.sin_addr.s_addr = ((struct in_addr *)(hp->h_addr))->s_addr; pin.sin_port = htons(PORT); /* grab an Internet domain socket */ if ((intSD = socket(AF_INET, SOCK_STREAM, 0)) == -1) { printHTMLError("socket error\n"); } /* connect to PORT on HOST */ if (connect(intSD,(struct sockaddr *) &pin, sizeof(pin)) == -1) { printHTMLError("connect error\n"); } /* send a message to the server PORT on machine HOST */ if (send(intSD, chrarrRequest, strlen(chrarrRequest), 0) == -1) { printHTMLError("send error\n"); } /* wait for a message to come back from the server */ intOutputHeadersFlag = 1; while ((intBytesRecieved = recv(intSD, chrarrDataIN, BUFSIZE, 0)) != -1) { if (intBytesRecieved == 0) break; chrarrDataIN[intBytesRecieved] = '\0'; if (fraw){ fwrite(chrarrDataIN, sizeof(char), intBytesRecieved, fraw); fprintf(fraw, "\n\n[END OF BLOCK]\n\n"); } if (intOutputHeadersFlag == 1){ intCntr = 0; /* index past server headers to 'Content-Type'... */ while (1){ while (chrarrDataIN[intCntr] != 'C'){ intCntr++; if (intCntr >= intBytesRecieved) break; } if (strncmpIC(&chrarrDataIN[intCntr], "Content-Type", 12)) break; intCntr++; if (intCntr >= intBytesRecieved) break; } if (intCntr < intBytesRecieved){ /* check to see if this is HTML, and if so set parse flag accordingly */ if (strncmpIC(&chrarrDataIN[intCntr], "Content-Type: text/html", strlen("Content-Type: text/html"))) intParseFlag = 1; intOutputHeadersFlag = 0; if (intParseFlag == 0){ /* just write the data out (probably image data) */ fwrite(&chrarrDataIN[intCntr], sizeof(char), intBytesRecieved-intCntr, stdout); if (fo) fwrite(&chrarrDataIN[intCntr], sizeof(char), intBytesRecieved-intCntr, fo); }else{ /* it is HTML, scan through and 'fix' links */ int intOutputBytes; intOutputBytes = parseHTML(&chrarrDataIN[intCntr], intBytesRecieved-intCntr); do { fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, stdout); if (fo) fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, fo); }while ((intOutputBytes = parseHTML(NULL, 0)) > 0); } } }else{ /* we have moved past the headers, just dump the data out */ if (intParseFlag == 0){ /* not HTML, just output */ fwrite(chrarrDataIN, sizeof(char), intBytesRecieved, stdout); if (fo) fwrite(chrarrDataIN, sizeof(char), intBytesRecieved, fo); }else{ /* parse for links, fix, then output */ int intOutputBytes; intOutputBytes = parseHTML(chrarrDataIN, intBytesRecieved); do { fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, stdout); if (fo) fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, fo); }while ((intOutputBytes = parseHTML(NULL, 0)) > 0); } } } #ifdef WIN32_LEAN_AND_MEAN closesocket(intSD); #else close(intSD); /*close gave C++ a compiler error in this program for some reason: implicit declaration of function `int close(...)', but seems to work fine with C */ #endif if (fo) fclose(fo); if (fraw) fclose(fraw); return 0; } /******************************** * * * printHTMLError * output error message for diagnostics * * *******************************/ void printHTMLError(char * err){ printf("Content-Type: text/html\n\n"); printf("
\n"); printf("
\n"); printf("Here you can browse the following sites:
\n");
printf("This is a simple proof-of-concept program and is not intended to\n
");
printf("handle more than simple HTML tags.\n
");
printf("You may view the source by clicking on the link below:\n
");
printf("Source.\n
");
printf("