Har laget en enkel parser opp mot 1881 sitt søk på nett og som henter ut navn, adresse og sted. Det fungerer også å søke på navn, men den er ikke idiotsikker 😉
Oppdatering: har fikset PHP-versjonen så den ikke avhenger av mitt privatmekkede HTTP-bibliotek.
PHP:
#!/usr/bin/php <?php require_once('lib/HTTP.php'); if (!isset($argv[1])) { show_help(); exit; } $tlf = $argv[1]; $tlf = urlencode($tlf); $url = 'http://www.1881.no/?Query=' . $tlf; $html = file_get_contents($url); $headers = get_headers($url, 1); preg_match(',charset=([A-Z0-9-]+),i', $headers['Content-Type'], $charset); if (isset($charset[1])) $charset = $charset[1]; else $charset = 'utf-8'; $match = ',<div.*?id="content_main".*?>.*?<div.*?class="listing alt".*?>.*?<h3><a[^>]*>(.*?)</a>.*?<span>(.*?)</span>.*?</h3>.*?<p.*?class="listing_address">.*?<span>(.*?)</span>.*?</p>,is'; preg_match($match, $html, $matches); if (isset($matches[1], $matches[2], $matches[3])) { $name = trim(html_entity_decode($matches[1], ENT_COMPAT, $charset)); $number = trim(html_entity_decode($matches[2], ENT_COMPAT, $charset)); $number = preg_replace(',[^0-9]+,','', $number); $address = trim(html_entity_decode($matches[3], ENT_COMPAT, $charset)); echo "Name: $name\n"; echo "Number: $number\n"; echo "Address: $address\n"; } else { echo "Sorry, no match.\n"; } function show_help() { $script = basename(__FILE__); echo "Usage: {$script} <number>\n"; echo "\n"; echo " -n\tdisplay name only\n"; echo " -t\tdisplay number only\n"; echo " -a\tdisplay address only\n"; exit; }
Python:
#!/usr/bin/python import urllib2 import re import sys try: number = sys.argv[1] except IndexError: sys.exit(-1) f = urllib2.urlopen("http://www.1881.no/?Query=" + urllib2.quote(number)) html = f.read() pattern = '<div.*?id="content_main".*?>.*?<div.*?class="listing alt".*?>.*?<h3><a[^>]*>(.*?)</a>.*?<span>(.*?)</span>.*?</h3>.*?<p.*?class="listing_address">.*?<span>(.*?)</span>.*?</p>' # Compile regex pattern. Note the "re.S" which is an requivalent to the "s" # global pattern p = re.compile(pattern, re.S); # perform the actual matching and return regexmatch object m = p.search(html); # check length of match tuple if m and len(m.groups()) == 3: name = m.group(1) # strip non-numeric characters in phone number phone = re.sub('[^0-9]+', '', m.group(2)) address = m.group(3) # print. the good old sprintf way print "Name: %s\nPhone: %s\nAddress: %s" % (name, phone, address) else: print "Sorry! No match :-(" f.close();