var http = require('http');

http.createServer(function(request, response) {
  var proxy = http.createClient(80, request.headers['host'])
  var proxy_request = proxy.request(request.method, request.url, request.headers);
  proxy_request.addListener('response', function (proxy_response) {
    proxy_response.addListener('data', function(chunk) {
      response.write(chunk, 'binary');
    });
    proxy_response.addListener('end', function() {
      response.end();
    });
    response.writeHead(proxy_response.statusCode, proxy_response.headers);
  });
  request.addListener('data', function(chunk) {
    proxy_request.write(chunk, 'binary');
  });
  request.addListener('end', function() {
    proxy_request.end();
  });
}).listen(8080);

This is just amazing. In 20 lines of node.js code and 10 minutes of time I was able to write a HTTP proxy. And it scales well, too. It's not a blocking HTTP proxy, it's event driven and asynchronous, meaning hundreds of people can use simultaneously and it will work well.

To get the proxy running all you have to do is download node.js, compile it, and run the proxy program via the node program:

$ ./configure --prefix=/home/pkrumins/installs/nodejs-0.1.92
$ make
$ make install

$ PATH=$PATH:/home/pkrumins/installs/nodejs-0.1.92/bin

$ node proxy.js

And from here you can take this proxy wherever your imagination takes. For example, you can start by adding logging:

var http = require('http');
var sys  = require('sys');

http.createServer(function(request, response) {
  sys.log(request.connection.remoteAddress + ": " + request.method + " " + request.url);
  var proxy = http.createClient(80, request.headers['host'])
  var proxy_request = proxy.request(request.method, request.url, request.headers);
  proxy_request.addListener('response', function (proxy_response) {
    proxy_response.addListener('data', function(chunk) {
      response.write(chunk, 'binary');
    });
    proxy_response.addListener('end', function() {
      response.end();
    });
    response.writeHead(proxy_response.statusCode, proxy_response.headers);
  });
  request.addListener('data', function(chunk) {
    proxy_request.write(chunk, 'binary');
  });
  request.addListener('end', function() {
    proxy_request.end();
  });
}).listen(8080);

Next, you can add a regex-based host blacklist in 15 additional lines:

var http = require('http');
var sys  = require('sys');
var fs   = require('fs');

var blacklist = [];

fs.watchFile('./blacklist', function(c,p) { update_blacklist(); });

function update_blacklist() {
  sys.log("Updating blacklist.");
  blacklist = fs.readFileSync('./blacklist').split('\n')
              .filter(function(rx) { return rx.length })
              .map(function(rx) { return RegExp(rx) });
}

http.createServer(function(request, response) {
  for (i in blacklist) {
    if (blacklist[i].test(request.url)) {
      sys.log("Denied: " + request.method + " " + request.url);
      response.end();
      return;
    }
  }

  sys.log(request.connection.remoteAddress + ": " + request.method + " " + request.url);
  var proxy = http.createClient(80, request.headers['host'])
  var proxy_request = proxy.request(request.method, request.url, request.headers);
  proxy_request.addListener('response', function(proxy_response) {
    proxy_response.addListener('data', function(chunk) {
      response.write(chunk, 'binary');
    });
    proxy_response.addListener('end', function() {
      response.end();
    });
    response.writeHead(proxy_response.statusCode, proxy_response.headers);
  });
  request.addListener('data', function(chunk) {
    proxy_request.write(chunk, 'binary);
  });
  request.addListener('end', function() {
    proxy_request.end();
  });
}).listen(8080);

update_blacklist();

Now to block proxy users from using Facebook, just echo facebook.com to blacklist file:

$ echo 'facebook.com' >> blacklist

The proxy server will automatically notice the changes to the file and update the blacklist.

Surely, a proxy server without IP control is no proxy server, so let's add that as well:

var http = require('http');
var sys  = require('sys');
var fs   = require('fs');

var blacklist = [];
var iplist    = [];

fs.watchFile('./blacklist', function(c,p) { update_blacklist(); });
fs.watchFile('./iplist', function(c,p) { update_iplist(); });

function update_blacklist() {
  sys.log("Updating blacklist.");
  blacklist = fs.readFileSync('./blacklist').split('\n')
              .filter(function(rx) { return rx.length })
              .map(function(rx) { return RegExp(rx) });
}

function update_iplist() {
  sys.log("Updating iplist.");
  iplist = fs.readFileSync('./iplist').split('\n')
           .filter(function(ip) { return ip.length });
}

http.createServer(function(request, response) {
  var allowed_ip = false;
  for (i in iplist) {
    if (iplist[i] == request.connection.remoteAddress) {
      allowed_ip = true;
      break;
    }
  }

  if (!allowed_ip) {
    sys.log("IP " + request.connection.remoteAddress + " is not allowed");
    response.end();
    return;
  }

  for (i in blacklist) {
    if (blacklist[i].test(request.url)) {
      sys.log("Denied: " + request.method + " " + request.url);
      response.end();
      return;
    }
  }

  sys.log(request.connection.remoteAddress + ": " + request.method + " " + request.url);
  var proxy = http.createClient(80, request.headers['host'])
  var proxy_request = proxy.request(request.method, request.url, request.headers);
  proxy_request.addListener('response', function(proxy_response) {
    proxy_response.addListener('data', function(chunk) {
      response.write(chunk, 'binary');
    });
    proxy_response.addListener('end', function() {
      response.end();
    });
    response.writeHead(proxy_response.statusCode, proxy_response.headers);
  });
  request.addListener('data', function(chunk) {
    proxy_request.write(chunk, 'binary');
  });
  request.addListener('end', function() {
    proxy_request.end();
  });
}).listen(8080);

update_blacklist();
update_iplist();

By default the proxy server will not allow any connections, so add all the IPs you want the proxy to be accessible from to iplist file:

$ echo '1.2.3.4' >> iplist

Finally, let's refactor the code a little:

var http = require('http');
var sys  = require('sys');
var fs   = require('fs');

var blacklist = [];
var iplist    = [];

fs.watchFile('./blacklist', function(c,p) { update_blacklist(); });
fs.watchFile('./iplist', function(c,p) { update_iplist(); });

function update_blacklist() {
  sys.log("Updating blacklist.");
  blacklist = fs.readFileSync('./blacklist').split('\n')
              .filter(function(rx) { return rx.length })
              .map(function(rx) { return RegExp(rx) });
}

function update_iplist() {
  sys.log("Updating iplist.");
  iplist = fs.readFileSync('./iplist').split('\n')
           .filter(function(rx) { return rx.length });
}

function ip_allowed(ip) {
  for (i in iplist) {
    if (iplist[i] == ip) {
      return true;
    }
  }
  return false;
}

function host_allowed(host) {
  for (i in blacklist) {
    if (blacklist[i].test(host)) {
      return false;
    }
  }
  return true;
}

function deny(response, msg) {
  response.writeHead(401);
  response.write(msg);
  response.end();
}

http.createServer(function(request, response) {
  var ip = request.connection.remoteAddress;
  if (!ip_allowed(ip)) {
    msg = "IP " + ip + " is not allowed to use this proxy";
    deny(response, msg);
    sys.log(msg);
    return;
  }

  if (!host_allowed(request.url)) {
    msg = "Host " + request.url + " has been denied by proxy configuration";
    deny(response, msg);
    sys.log(msg);
    return;
  }

  sys.log(ip + ": " + request.method + " " + request.url);
  var proxy = http.createClient(80, request.headers['host'])
  var proxy_request = proxy.request(request.method, request.url, request.headers);
  proxy_request.addListener('response', function(proxy_response) {
    proxy_response.addListener('data', function(chunk) {
      response.write(chunk, 'binary');
    });
    proxy_response.addListener('end', function() {
      response.end();
    });
    response.writeHead(proxy_response.statusCode, proxy_response.headers);
  });
  request.addListener('data', function(chunk) {
    proxy_request.write(chunk, 'binary);
  });
  request.addListener('end', function() {
    proxy_request.end();
  });
}).listen(8080);

update_blacklist();
update_iplist();

Again, it's amazing how fast you can write server software in node.js and JavaScript. It would have probably taken me a day to write the same in C. I love how fast you can prototype the software nowadays.

Download proxy.js

Download link: proxy server written in node.js
Download URL: http://www.catonmat.net/download/proxy.js
Downloaded: 4169 times

I am gonna build this proxy up, so I also put it on GitHub: proxy.js on GitHub

Happy proxying!

Comments

questions Permalink
April 28, 2010, 11:50

i have no clue about js event model.
so,
does the order in which proxy_response 'data' and 'end' events are received guarantee the order in which the respective callbacks are processed?

April 28, 2010, 15:20

Well events get queued. If there was 'data', then 'data' callback gets called. If the connection closes 'end' gets called. So they are in order.

April 28, 2010, 12:02

Glad you're following node.js as well.

April 28, 2010, 12:21

Great code, well done.
Maybe, we can think of something for my www.proxy.ps domain with this? Just in case.. Best wishes, Ruslan

April 28, 2010, 15:21

You could! With a bit of hacking and improvements. JavaScript is not that difficult!

April 28, 2010, 14:15

//var proxy = http.createClient(80, request.headers['host'])

This line means that the proxy is only HTTP1.1 compatible

April 28, 2010, 15:21

Well there is HTTP 1.0 and HTTP 1.1. What do you mean then by only?

April 28, 2010, 17:28

Why port 80 only? some webservers run on alternate ports.

April 29, 2010, 14:09

Port 80 because I wanted the code to fit in 20 lines.

You can just split on ':' on request.url and see if another port is specified...

April 30, 2010, 07:33

Header 'host' present in http-headers since v1.1 You must parse URL in first-line of http request.

April 30, 2010, 15:48

I am just forwarding all headers, the 'host' is in there!

prime Permalink
April 28, 2010, 16:33

It is as simple as using tornado, gevents, etc... But the problem is still one in most async solutions, if you need to block in one one call, all other connections will block. I am not talking about a socket block here though, but more of an intensive computation kind of block.
Erlang has a different solution for that.

April 29, 2010, 14:07

What's the Erlang's solution?

August 05, 2011, 22:13

I wouldn't say "Erlang has a solution" as this implies that it had the problem in the first place, which it didn't. Erlang's processes are concurrent, so there is no blocking in the same way that Node.js blocks. One process per request, one does not block on another unless you deliberately put something in place to make it happen.

April 28, 2010, 19:32

Very handy bit of code there. A useful little weight forwarding proxy

April 29, 2010, 02:05

that is so small! very useful!
i guess we can make that in python with simplehttpserver. that would be smaller. or not?

April 29, 2010, 14:08

It would be smaller but it also would be blocking. :(

April 29, 2010, 16:26

You could do it using the Twisted Python framework in a three lines or so (Twisted is asynchronous and event driven like nodejs) :)

April 29, 2010, 17:11

Can you show me how to do it in 3 loc of Twisted?

May 06, 2010, 19:41

from twisted.web import proxy, http
from twisted.internet import reactor

class ProxyFactory(http.HTTPFactory):
protocol = proxy.Proxy

reactor.listenTCP(8080, ProxyFactory())
reactor.run()

jherber Permalink
October 20, 2010, 02:18

Bah, this is pre-packaged class, not the actual proxy implemented in Twisted.

tom Permalink
December 28, 2011, 03:39

Um, what did you think the node.js one was? Did you not see the require('http') and require('sys') at the top?

Tom Permalink
April 30, 2010, 11:45

does it support https ?

April 30, 2010, 12:07

Sorry, it does not.

f00li5h Permalink
May 24, 2010, 12:46

meow!

seeker Permalink
July 25, 2010, 03:55

I am trying to do a slight improvement to the proxy, I would like to block sites if they contain certain words, the problem I am facing is that some sites are encoding their content via gzip. I am trying to use node-compress to decompress the stream, but was unsuccessful. If you've got an ideas or if you can add another example that would be greatly appreciated.

March 14, 2011, 17:21

Thanks for your nice proxy - we were quite inspired by your idea and wrote a translucent intercepting proxy (tip.js), and we're using it to modify the response of our web-service:

Translucent Intercepting Proxy

sami Permalink
June 10, 2011, 10:20

read file function should be updated:

fs.readFileSync('whitelist.txt', "utf8")

otherwise this returns an error: Object

<filename>

has no method 'split'

Apparently node core has been modified: https://github.com/joyent/node/issues/186

July 18, 2011, 01:10

That's really cool -- node is great for this sort of thing.

However, HTTP proxies aren't *quite* that simple; for example, you really need to strip hop-by-hop headers, or it'll break some things.

See what proxies must do.

July 25, 2011, 00:45

This works, but not on a lot of YouTube pages. I seems like YouTube rejects web requests that go through a proxy server and redirects you to a page not found page. I tried it using EventMachine in Ruby too, and the same result. Any ideas?

respectTheCode Permalink
September 17, 2011, 13:02

You can actually take 4 lines of code out by passing the end function directly to the end event listener. For example

request.addListener('end', function() {
  proxy_request.end();
});

can be written as

request.addListener('end', proxy_request.end);
giuseppe Permalink
September 24, 2011, 14:06

You proxy fails for this video server (IExplorer)
http://87.22.235.24/index.htm
Regards and thanks for any tip

mobeen Permalink
October 07, 2011, 05:32

hi,
I want to create a proxy on the web server. I pass the url of the image I want to a specific route on my server and then it downloads the image data and returns it to my javascript thus hiding its true origins.All i want to deal with my localhost only.The actual web server remain hide
How i can done this please help
thanks

Leave a new comment

(why do I need your e-mail?)

(Your twitter name, if you have one. (I'm @pkrumins, btw.))

Type first 3 letters of your name: (just to make sure you're a human)

Please preview the comment before submitting to make sure it's OK.

Advertisements