var http = require('http');
http.createServer(function(request, response) {
var proxy = http.createClient(80, request.headers['host'])
var proxy_request = proxy.request(request.method, request.url, request.headers);
proxy_request.addListener('response', function (proxy_response) {
proxy_response.addListener('data', function(chunk) {
response.write(chunk, 'binary');
});
proxy_response.addListener('end', function() {
response.end();
});
response.writeHead(proxy_response.statusCode, proxy_response.headers);
});
request.addListener('data', function(chunk) {
proxy_request.write(chunk, 'binary');
});
request.addListener('end', function() {
proxy_request.end();
});
}).listen(8080);
This is just amazing. In 20 lines of node.js code and 10 minutes of time I was able to write a HTTP proxy. And it scales well, too. It's not a blocking HTTP proxy, it's event driven and asynchronous, meaning hundreds of people can use simultaneously and it will work well.
To get the proxy running all you have to do is download node.js, compile it, and run the proxy program via the node program:
$ ./configure --prefix=/home/pkrumins/installs/nodejs-0.1.92 $ make $ make install $ PATH=$PATH:/home/pkrumins/installs/nodejs-0.1.92/bin $ node proxy.js
And from here you can take this proxy wherever your imagination takes. For example, you can start by adding logging:
var http = require('http');
var sys = require('sys');
http.createServer(function(request, response) {
sys.log(request.connection.remoteAddress + ": " + request.method + " " + request.url);
var proxy = http.createClient(80, request.headers['host'])
var proxy_request = proxy.request(request.method, request.url, request.headers);
proxy_request.addListener('response', function (proxy_response) {
proxy_response.addListener('data', function(chunk) {
response.write(chunk, 'binary');
});
proxy_response.addListener('end', function() {
response.end();
});
response.writeHead(proxy_response.statusCode, proxy_response.headers);
});
request.addListener('data', function(chunk) {
proxy_request.write(chunk, 'binary');
});
request.addListener('end', function() {
proxy_request.end();
});
}).listen(8080);
Next, you can add a regex-based host blacklist in 15 additional lines:
var http = require('http');
var sys = require('sys');
var fs = require('fs');
var blacklist = [];
fs.watchFile('./blacklist', function(c,p) { update_blacklist(); });
function update_blacklist() {
sys.log("Updating blacklist.");
blacklist = fs.readFileSync('./blacklist').split('\n')
.filter(function(rx) { return rx.length })
.map(function(rx) { return RegExp(rx) });
}
http.createServer(function(request, response) {
for (i in blacklist) {
if (blacklist[i].test(request.url)) {
sys.log("Denied: " + request.method + " " + request.url);
response.end();
return;
}
}
sys.log(request.connection.remoteAddress + ": " + request.method + " " + request.url);
var proxy = http.createClient(80, request.headers['host'])
var proxy_request = proxy.request(request.method, request.url, request.headers);
proxy_request.addListener('response', function(proxy_response) {
proxy_response.addListener('data', function(chunk) {
response.write(chunk, 'binary');
});
proxy_response.addListener('end', function() {
response.end();
});
response.writeHead(proxy_response.statusCode, proxy_response.headers);
});
request.addListener('data', function(chunk) {
proxy_request.write(chunk, 'binary);
});
request.addListener('end', function() {
proxy_request.end();
});
}).listen(8080);
update_blacklist();
Now to block proxy users from using Facebook, just echo facebook.com to blacklist file:
$ echo 'facebook.com' >> blacklist
The proxy server will automatically notice the changes to the file and update the blacklist.
Surely, a proxy server without IP control is no proxy server, so let's add that as well:
var http = require('http');
var sys = require('sys');
var fs = require('fs');
var blacklist = [];
var iplist = [];
fs.watchFile('./blacklist', function(c,p) { update_blacklist(); });
fs.watchFile('./iplist', function(c,p) { update_iplist(); });
function update_blacklist() {
sys.log("Updating blacklist.");
blacklist = fs.readFileSync('./blacklist').split('\n')
.filter(function(rx) { return rx.length })
.map(function(rx) { return RegExp(rx) });
}
function update_iplist() {
sys.log("Updating iplist.");
iplist = fs.readFileSync('./iplist').split('\n')
.filter(function(ip) { return ip.length });
}
http.createServer(function(request, response) {
var allowed_ip = false;
for (i in iplist) {
if (iplist[i] == request.connection.remoteAddress) {
allowed_ip = true;
break;
}
}
if (!allowed_ip) {
sys.log("IP " + request.connection.remoteAddress + " is not allowed");
response.end();
return;
}
for (i in blacklist) {
if (blacklist[i].test(request.url)) {
sys.log("Denied: " + request.method + " " + request.url);
response.end();
return;
}
}
sys.log(request.connection.remoteAddress + ": " + request.method + " " + request.url);
var proxy = http.createClient(80, request.headers['host'])
var proxy_request = proxy.request(request.method, request.url, request.headers);
proxy_request.addListener('response', function(proxy_response) {
proxy_response.addListener('data', function(chunk) {
response.write(chunk, 'binary');
});
proxy_response.addListener('end', function() {
response.end();
});
response.writeHead(proxy_response.statusCode, proxy_response.headers);
});
request.addListener('data', function(chunk) {
proxy_request.write(chunk, 'binary');
});
request.addListener('end', function() {
proxy_request.end();
});
}).listen(8080);
update_blacklist();
update_iplist();
By default the proxy server will not allow any connections, so add all the IPs you want the proxy to be accessible from to iplist file:
$ echo '1.2.3.4' >> iplist
Finally, let's refactor the code a little:
var http = require('http');
var sys = require('sys');
var fs = require('fs');
var blacklist = [];
var iplist = [];
fs.watchFile('./blacklist', function(c,p) { update_blacklist(); });
fs.watchFile('./iplist', function(c,p) { update_iplist(); });
function update_blacklist() {
sys.log("Updating blacklist.");
blacklist = fs.readFileSync('./blacklist').split('\n')
.filter(function(rx) { return rx.length })
.map(function(rx) { return RegExp(rx) });
}
function update_iplist() {
sys.log("Updating iplist.");
iplist = fs.readFileSync('./iplist').split('\n')
.filter(function(rx) { return rx.length });
}
function ip_allowed(ip) {
for (i in iplist) {
if (iplist[i] == ip) {
return true;
}
}
return false;
}
function host_allowed(host) {
for (i in blacklist) {
if (blacklist[i].test(host)) {
return false;
}
}
return true;
}
function deny(response, msg) {
response.writeHead(401);
response.write(msg);
response.end();
}
http.createServer(function(request, response) {
var ip = request.connection.remoteAddress;
if (!ip_allowed(ip)) {
msg = "IP " + ip + " is not allowed to use this proxy";
deny(response, msg);
sys.log(msg);
return;
}
if (!host_allowed(request.url)) {
msg = "Host " + request.url + " has been denied by proxy configuration";
deny(response, msg);
sys.log(msg);
return;
}
sys.log(ip + ": " + request.method + " " + request.url);
var proxy = http.createClient(80, request.headers['host'])
var proxy_request = proxy.request(request.method, request.url, request.headers);
proxy_request.addListener('response', function(proxy_response) {
proxy_response.addListener('data', function(chunk) {
response.write(chunk, 'binary');
});
proxy_response.addListener('end', function() {
response.end();
});
response.writeHead(proxy_response.statusCode, proxy_response.headers);
});
request.addListener('data', function(chunk) {
proxy_request.write(chunk, 'binary);
});
request.addListener('end', function() {
proxy_request.end();
});
}).listen(8080);
update_blacklist();
update_iplist();
Again, it's amazing how fast you can write server software in node.js and JavaScript. It would have probably taken me a day to write the same in C. I love how fast you can prototype the software nowadays.
Download proxy.js
Download link: proxy server written in node.js
Download URL: http://www.catonmat.net/download/proxy.js
Downloaded: 4169 times
I am gonna build this proxy up, so I also put it on GitHub: proxy.js on GitHub
Happy proxying!


Hacker Newsletter - a weekly newsletter of the best articles on startups, programming, and more. All links are curated by hand from Hacker News.
Twitter
Facebook
Plurk
more
GitHub
LinkedIn
FriendFeed
Google Plus
Amazon wish list
Comments
i have no clue about js event model.
so,
does the order in which proxy_response 'data' and 'end' events are received guarantee the order in which the respective callbacks are processed?
Well events get queued. If there was 'data', then 'data' callback gets called. If the connection closes 'end' gets called. So they are in order.
Glad you're following node.js as well.
:)
Great code, well done.
Maybe, we can think of something for my www.proxy.ps domain with this? Just in case.. Best wishes, Ruslan
You could! With a bit of hacking and improvements. JavaScript is not that difficult!
//var proxy = http.createClient(80, request.headers['host'])
This line means that the proxy is only HTTP1.1 compatible
Well there is HTTP 1.0 and HTTP 1.1. What do you mean then by only?
Why port 80 only? some webservers run on alternate ports.
Port 80 because I wanted the code to fit in 20 lines.
You can just split on ':' on request.url and see if another port is specified...
Header 'host' present in http-headers since v1.1 You must parse URL in first-line of http request.
I am just forwarding all headers, the 'host' is in there!
It is as simple as using tornado, gevents, etc... But the problem is still one in most async solutions, if you need to block in one one call, all other connections will block. I am not talking about a socket block here though, but more of an intensive computation kind of block.
Erlang has a different solution for that.
What's the Erlang's solution?
I wouldn't say "Erlang has a solution" as this implies that it had the problem in the first place, which it didn't. Erlang's processes are concurrent, so there is no blocking in the same way that Node.js blocks. One process per request, one does not block on another unless you deliberately put something in place to make it happen.
Very handy bit of code there. A useful little weight forwarding proxy
Indeed.
that is so small! very useful!
i guess we can make that in python with simplehttpserver. that would be smaller. or not?
It would be smaller but it also would be blocking. :(
You could do it using the Twisted Python framework in a three lines or so (Twisted is asynchronous and event driven like nodejs) :)
Can you show me how to do it in 3 loc of Twisted?
from twisted.web import proxy, http
from twisted.internet import reactor
class ProxyFactory(http.HTTPFactory):
protocol = proxy.Proxy
reactor.listenTCP(8080, ProxyFactory())
reactor.run()
Bah, this is pre-packaged class, not the actual proxy implemented in Twisted.
Um, what did you think the node.js one was? Did you not see the require('http') and require('sys') at the top?
does it support https ?
Sorry, it does not.
meow!
I am trying to do a slight improvement to the proxy, I would like to block sites if they contain certain words, the problem I am facing is that some sites are encoding their content via gzip. I am trying to use node-compress to decompress the stream, but was unsuccessful. If you've got an ideas or if you can add another example that would be greatly appreciated.
Thanks for your nice proxy - we were quite inspired by your idea and wrote a translucent intercepting proxy (tip.js), and we're using it to modify the response of our web-service:
Translucent Intercepting Proxy
read file function should be updated:
fs.readFileSync('whitelist.txt', "utf8")
otherwise this returns an error: Object
<filename>has no method 'split'
Apparently node core has been modified: https://github.com/joyent/node/issues/186
That's really cool -- node is great for this sort of thing.
However, HTTP proxies aren't *quite* that simple; for example, you really need to strip hop-by-hop headers, or it'll break some things.
See what proxies must do.
This works, but not on a lot of YouTube pages. I seems like YouTube rejects web requests that go through a proxy server and redirects you to a page not found page. I tried it using EventMachine in Ruby too, and the same result. Any ideas?
You can actually take 4 lines of code out by passing the end function directly to the end event listener. For example
can be written as
You proxy fails for this video server (IExplorer)
http://87.22.235.24/index.htm
Regards and thanks for any tip
hi,
I want to create a proxy on the web server. I pass the url of the image I want to a specific route on my server and then it downloads the image data and returns it to my javascript thus hiding its true origins.All i want to deal with my localhost only.The actual web server remain hide
How i can done this please help
thanks
Leave a new comment