Har laget en enkel parser opp mot 1881 sitt søk på nett og som henter ut navn, adresse og sted. Det fungerer også å søke på navn, men den er ikke idiotsikker 😉
Oppdatering: har fikset PHP-versjonen så den ikke avhenger av mitt privatmekkede HTTP-bibliotek.
PHP:
#!/usr/bin/php
<?php
require_once('lib/HTTP.php');
if (!isset($argv[1])) {
show_help();
exit;
}
$tlf = $argv[1];
$tlf = urlencode($tlf);
$url = 'http://www.1881.no/?Query=' . $tlf;
$html = file_get_contents($url);
$headers = get_headers($url, 1);
preg_match(',charset=([A-Z0-9-]+),i', $headers['Content-Type'], $charset);
if (isset($charset[1]))
$charset = $charset[1];
else
$charset = 'utf-8';
$match = ',<div.*?id="content_main".*?>.*?<div.*?class="listing alt".*?>.*?<h3><a[^>]*>(.*?)</a>.*?<span>(.*?)</span>.*?</h3>.*?<p.*?class="listing_address">.*?<span>(.*?)</span>.*?</p>,is';
preg_match($match, $html, $matches);
if (isset($matches[1], $matches[2], $matches[3])) {
$name = trim(html_entity_decode($matches[1], ENT_COMPAT, $charset));
$number = trim(html_entity_decode($matches[2], ENT_COMPAT, $charset));
$number = preg_replace(',[^0-9]+,','', $number);
$address = trim(html_entity_decode($matches[3], ENT_COMPAT, $charset));
echo "Name: $name\n";
echo "Number: $number\n";
echo "Address: $address\n";
}
else {
echo "Sorry, no match.\n";
}
function show_help() {
$script = basename(__FILE__);
echo "Usage: {$script} <number>\n";
echo "\n";
echo " -n\tdisplay name only\n";
echo " -t\tdisplay number only\n";
echo " -a\tdisplay address only\n";
exit;
}
Python:
#!/usr/bin/python
import urllib2
import re
import sys
try:
number = sys.argv[1]
except IndexError:
sys.exit(-1)
f = urllib2.urlopen("http://www.1881.no/?Query=" + urllib2.quote(number))
html = f.read()
pattern = '<div.*?id="content_main".*?>.*?<div.*?class="listing alt".*?>.*?<h3><a[^>]*>(.*?)</a>.*?<span>(.*?)</span>.*?</h3>.*?<p.*?class="listing_address">.*?<span>(.*?)</span>.*?</p>'
# Compile regex pattern. Note the "re.S" which is an requivalent to the "s"
# global pattern
p = re.compile(pattern, re.S);
# perform the actual matching and return regexmatch object
m = p.search(html);
# check length of match tuple
if m and len(m.groups()) == 3:
name = m.group(1)
# strip non-numeric characters in phone number
phone = re.sub('[^0-9]+', '', m.group(2))
address = m.group(3)
# print. the good old sprintf way
print "Name: %s\nPhone: %s\nAddress: %s" % (name, phone, address)
else:
print "Sorry! No match :-("
f.close();