/******************************** * * censor.c * Control program for CGI version of * censorship avoidance proof of concept * * This program has been compiled and run under * Windows 2000 and FreeBSD 4.4 * *******************************/ #include "censor.h" /* Global variables */ char m_chrarrDataOUT[BUFSIZE]; char m_chrarrLinkData[BUFSIZE]; char m_chrarrDocRoot[BUFSIZE]; int m_intHostIndex; enum TAGTYPES{ enuTagNotFound = 0, enuHREF, enuAREA, enuIMGSRC, enuFRAME, enuBODY, enuLINK, enuSCRIPT }; /* Function prototypes */ int strncmpIC(char * strA, char * strB, int maxlen); int charIN(char testChar, char * searchChars); void printHTMLError(char * err); void printWelcomeScreen(); int parseHTML(char *chrarrDataIN, int intLen); int getLinkRewrite(char *chrLinkData, int intMaxLen); /******************************** * * * Main entry point of program * * *******************************/ int main(){ char chrarrHostName[HOSTSIZE]; char chrarrDataIN[BUFSIZE]; char chrarrRequest[BUFSIZE], *chrptrQueryString; int intSD, intCntr, intOutputHeadersFlag, intBytesRecieved; int intParseFlag = 0; struct sockaddr_in pin; struct hostent *hp; FILE *fo=NULL, *fraw=NULL; #ifdef WIN32_LEAN_AND_MEAN WSADATA wsaData; #endif m_intHostIndex = -1; chrptrQueryString = getenv("QUERY_STRING"); /* different platforms return NULL or zero length string */ if (chrptrQueryString && strlen(chrptrQueryString) > 0){ int maxid, i, j; char val[10]; /* printHTMLError(chrptrQueryString); exit(0); */ memset(val, '\0', 10); for (i=0; m_HostArray[i].id >= 0; i++) {;} maxid = m_HostArray[i-1].id; i = 0; while (i < 9){ if (!isdigit(chrptrQueryString[i])) break; if (!isdigit(chrptrQueryString[i])){ sprintf(chrarrDataIN, "(1) Invalid input data!\n%d [%s]", i, chrptrQueryString); printHTMLError(chrarrDataIN); } val[i] = chrptrQueryString[i]; i++; } if (i==10) printHTMLError("(2) Invalid input data!\n"); m_intHostIndex = atoi(val); if (m_intHostIndex < 0 || m_intHostIndex > maxid){ sprintf(chrarrDataIN, "(3) Invalid input data!\n%d [%s] %d", m_intHostIndex, val, maxid); printHTMLError(chrarrDataIN); } if (chrptrQueryString[i] != '\0'){/* we have a request */ j = 0; while (j < BUFSIZE - 100 && chrptrQueryString[i] != '\0'){ m_chrarrDataOUT[j] = chrptrQueryString[i]; m_chrarrDocRoot[j] = chrptrQueryString[i]; i++; j++; } m_chrarrDataOUT[j] = '\0'; /* to obtain document root, back up until a slash is found */ while (j>0 && m_chrarrDocRoot[j] != '/') j--; m_chrarrDocRoot[j++] = '/'; m_chrarrDocRoot[j] = '\0'; /* printHTMLError(m_chrarrDocRoot); exit(0); */ if (strlen(m_chrarrDataOUT) > BUFSIZE - 100){/* ensure no buffer overruns on sprintf */ printHTMLError("m_chrarrDataOUT too big!\n"); } sprintf(chrarrRequest, "GET %s HTTP/1.0\nUser-Agent: anti-censor 0.1\n\n", m_chrarrDataOUT); }else{ /* either no request, or we ignore the value */ sprintf(chrarrRequest, "GET / HTTP/1.0\nUser-Agent: anti-censor 0.1\n\n"); sprintf(m_chrarrDocRoot, "/"); } } /* m_intHostIndex=3; sprintf(m_chrarrDocRoot, "/"); */ if (m_intHostIndex < 0){ printWelcomeScreen(); exit(0); } /* printHTMLError(chrarrRequest); exit(0); */ if (DO_DEBUG == 1){/* for testing */ int i=0; char fileName[BUFSIZE]; /* find first available name, quit this after 100 files */ do { sprintf(fileName, "tmp%d.htm", i++); }while ((fo = fopen(fileName, "r")) != NULL && i < 100); if (i < 100){ i--; sprintf(fileName, "tmp%d.htm", i); if ((fo = fopen(fileName, "w")) == NULL){ sprintf(fileName, "Can't open tmp%d.htm!\n", i); printHTMLError(fileName); } sprintf(fileName, "raw%d.dat", i); if ((fraw = fopen(fileName, "w")) == NULL){ sprintf(fileName, "Can't open raw%d.dat!\n", i); printHTMLError(fileName); } } } strncpy(chrarrHostName, m_HostArray[m_intHostIndex].name, HOSTSIZE); #ifdef WIN32_LEAN_AND_MEAN if (WSAStartup(0x202,&wsaData) == SOCKET_ERROR) { sprintf(chrarrDataIN,"WSAStartup failed with error %d\n",WSAGetLastError()); WSACleanup(); printHTMLError(chrarrDataIN); } #endif /* go find out about the desired host machine */ if ((hp = gethostbyname(chrarrHostName)) == NULL) { printHTMLError("gethostbyname error\n"); } /* fill in the socket structure with host information */ memset(&pin, 0, sizeof(pin)); pin.sin_family = AF_INET; pin.sin_addr.s_addr = ((struct in_addr *)(hp->h_addr))->s_addr; pin.sin_port = htons(PORT); /* grab an Internet domain socket */ if ((intSD = socket(AF_INET, SOCK_STREAM, 0)) == -1) { printHTMLError("socket error\n"); } /* connect to PORT on HOST */ if (connect(intSD,(struct sockaddr *) &pin, sizeof(pin)) == -1) { printHTMLError("connect error\n"); } /* send a message to the server PORT on machine HOST */ if (send(intSD, chrarrRequest, strlen(chrarrRequest), 0) == -1) { printHTMLError("send error\n"); } /* wait for a message to come back from the server */ intOutputHeadersFlag = 1; while ((intBytesRecieved = recv(intSD, chrarrDataIN, BUFSIZE, 0)) != -1) { if (intBytesRecieved == 0) break; chrarrDataIN[intBytesRecieved] = '\0'; if (fraw){ fwrite(chrarrDataIN, sizeof(char), intBytesRecieved, fraw); fprintf(fraw, "\n\n[END OF BLOCK]\n\n"); } if (intOutputHeadersFlag == 1){ intCntr = 0; /* index past server headers to 'Content-Type'... */ while (1){ while (chrarrDataIN[intCntr] != 'C'){ intCntr++; if (intCntr >= intBytesRecieved) break; } if (strncmpIC(&chrarrDataIN[intCntr], "Content-Type", 12)) break; intCntr++; if (intCntr >= intBytesRecieved) break; } if (intCntr < intBytesRecieved){ /* check to see if this is HTML, and if so set parse flag accordingly */ if (strncmpIC(&chrarrDataIN[intCntr], "Content-Type: text/html", strlen("Content-Type: text/html"))) intParseFlag = 1; intOutputHeadersFlag = 0; if (intParseFlag == 0){ /* just write the data out (probably image data) */ fwrite(&chrarrDataIN[intCntr], sizeof(char), intBytesRecieved-intCntr, stdout); if (fo) fwrite(&chrarrDataIN[intCntr], sizeof(char), intBytesRecieved-intCntr, fo); }else{ /* it is HTML, scan through and 'fix' links */ int intOutputBytes; intOutputBytes = parseHTML(&chrarrDataIN[intCntr], intBytesRecieved-intCntr); do { fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, stdout); if (fo) fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, fo); }while ((intOutputBytes = parseHTML(NULL, 0)) > 0); } } }else{ /* we have moved past the headers, just dump the data out */ if (intParseFlag == 0){ /* not HTML, just output */ fwrite(chrarrDataIN, sizeof(char), intBytesRecieved, stdout); if (fo) fwrite(chrarrDataIN, sizeof(char), intBytesRecieved, fo); }else{ /* parse for links, fix, then output */ int intOutputBytes; intOutputBytes = parseHTML(chrarrDataIN, intBytesRecieved); do { fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, stdout); if (fo) fwrite(m_chrarrDataOUT, sizeof(char), intOutputBytes, fo); }while ((intOutputBytes = parseHTML(NULL, 0)) > 0); } } } #ifdef WIN32_LEAN_AND_MEAN closesocket(intSD); #else close(intSD); /*close gave C++ a compiler error in this program for some reason: implicit declaration of function `int close(...)', but seems to work fine with C */ #endif if (fo) fclose(fo); if (fraw) fclose(fraw); return 0; } /******************************** * * * printHTMLError * output error message for diagnostics * * *******************************/ void printHTMLError(char * err){ printf("Content-Type: text/html\n\n"); printf("Error\n"); printf("

Error detected!

\n"); printf("

", err); printf("\n"); exit(1); } /******************************** * * printWelcomeScreen * Initial screen displayed for user if no arguments supplied * * * *******************************/ void printWelcomeScreen(){ int i; printf("Content-Type: text/html\n\n"); printf("\n"); printf("\n"); printf("\n"); printf("Welcome to Keith Oxenrider's Censorship Avoidance Site!\n"); printf("

Welcome to Keith Oxenrider's Censorship Avoidance Site!

\n"); printf("Here you can browse the following sites:

\n"); printf("NOTE: This program was tested against IE 5.01 and 5.5 on Windows 2000.\n
"); printf("It may work fine on other platforms, but no effort has been made to test it.

\n"); printf("This is a simple proof-of-concept program and is not intended to\n
"); printf("handle more than simple HTML tags.\n

"); printf("You may view the source by clicking on the link below:\n
"); printf("Source.\n
"); printf("

\n"); return; } /******************************** * * parseHTML * Replace all hyperlinks with redirection code * Returns number of characters to write * *******************************/ int parseHTML(char *chrarrDataIN, int intLen){ static sintLen, sintChrIndex; int intPtr = 0, intMvPtrTo, i; static char *schrPtr; if (chrarrDataIN){ schrPtr = chrarrDataIN; sintChrIndex = 0; sintLen = intLen; } while (sintChrIndex < BUFSIZE && sintChrIndex < sintLen){ if (schrPtr[sintChrIndex] == '<'){ intMvPtrTo = getLinkRewrite(&schrPtr[sintChrIndex+1], sintLen-sintChrIndex); if (intMvPtrTo > 0){ sintChrIndex += intMvPtrTo; i = 0; while (m_chrarrLinkData[i] != '\0') m_chrarrDataOUT[intPtr++] = m_chrarrLinkData[i++]; } } m_chrarrDataOUT[intPtr++] = schrPtr[sintChrIndex++]; } m_chrarrDataOUT[intPtr] = '\0'; return intPtr; } /******************************** * * getLinkRewrite * Returns number of bytes to move pointer * Calling routine copies null terminated value from m_chrarrLinkData * *******************************/ /******************************** * * getLinkRewrite * Returns number of bytes to move pointer * Calling routine copies null terminated value from m_chrarrLinkData * *******************************/ int getLinkRewrite(char *chrLinkData, int intMaxLen){ int i, j, k, tagType = enuTagNotFound, findLen; int linkIndex; char tmp[10]; /* strncpy(tmp, chrLinkData, 9); tmp[9] = '\0'; fprintf(stderr, "%s\n", tmp); */ if (strncmpIC(chrLinkData, "!--", strlen("!--"))) j = 0; /* look for a closing bracket, if not found, it crosses a batch boundary, just dump it for now (later work up code to handle this) */ j = 0; while (j') /* this tag lacks an ' HREF' */ return 0; } }else if (tagType == enuIMGSRC || tagType == enuFRAME || tagType == enuSCRIPT){ findLen = 4; while (i') /* this tag lacks a SRC */ return 0; m_chrarrLinkData[i++] = chrLinkData[j++]; } }else if (tagType == enuBODY){ findLen = 10; while (i') /* this tag lacks a BACKGROUND */ return 0; m_chrarrLinkData[i++] = chrLinkData[j++]; } } /* copy what was searched for (i.e., ' HREF', ' SRC', etc.) */ for (k=0; k= 0){ if (strncmpIC(&chrLinkData[j+7], m_HostArray[k].name, strlen(m_HostArray[k].name))){ j += 7 + strlen(m_HostArray[k].name); break; } k++; } if (m_HostArray[k].id < 0) return 0; linkIndex = m_HostArray[k].id; } if (linkIndex < 0) linkIndex = m_intHostIndex; /* add in the required link elements */ k = 0; while (g_chrarrRequestRoot[k] != '\0') m_chrarrLinkData[i++] = g_chrarrRequestRoot[k++]; m_chrarrLinkData[i++] = '?'; sprintf(tmp, "%d", linkIndex); k = 0; while (tmp[k] != '\0') m_chrarrLinkData[i++] = tmp[k++]; /* then the document root (if necessary) */ if (chrLinkData[j] != '/'){ /* it is not already referencing root */ k = 0; while (m_chrarrDocRoot[k] != '\0') m_chrarrLinkData[i++] = m_chrarrDocRoot[k++]; } while (i's strncmp, but case free * and returns 1 if a match is made, zero otherwise (opposite of strncmp) * *******************************/ int strncmpIC(char * strA, char * strB, int maxlen){ int i=0; while (strA[i] != '\0' && strB[i] != '\0' && i < maxlen){ if (toupper(strA[i]) != toupper(strB[i])) break; i++; } if (i == maxlen) return 1; else return 0; } /******************************** * * * chrIN, test to see if character (testChar) matches any * characters in searchChars (ignores case). If so * return 1 (true). * *******************************/ int charIN(char testChar, char * searchChars){ int i = 0; while (searchChars[i] != '\0'){ if (toupper(searchChars[i]) == toupper(testChar)) return 1; i++; } return 0; }