diff --git a/inc_codes.py b/inc_codes.py new file mode 100644 index 0000000..f0ba86d --- /dev/null +++ b/inc_codes.py @@ -0,0 +1,191 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# $Rev$ +# $Date$ +# + +from operator import itemgetter +import lxml.html +import sqlite3 +import sys +import urllib + + +BASE = 10000000 +DATA_DB = 'test.db' + + +START_URLS = [ + 'http://www.rossvyaz.ru/docs/articles/ABC-3x.html', + 'http://www.rossvyaz.ru/docs/articles/ABC-4x.html', + 'http://www.rossvyaz.ru/docs/articles/ABC-8x.html', + 'http://www.rossvyaz.ru/docs/articles/DEF-9x.html', + ] + +def f2num(x): + """ + >>> f2num(['900', '0000000', '0099999']) + (9000000000, 9000099999) + + """ + try: + x = map(int, x[:3]) + return (x[0] * BASE + x[1], x[0] * BASE + x[2]) + except UnicodeEncodeError: + return None + + +def parseURL(url): + """ + http://stackoverflow.com/questions/11795077/simple-example-regarding-how-to-parseURL-data-from-html-output-using-lxml + + >>> content, nums = parseURL(START_URLS[3]) + >>> content + 'text/html; charset=windows-1251' + """ + p = urllib.urlopen(url) + doc = lxml.html.document_fromstring(p.read()) + content = doc.xpath('//meta')[0].get('content') + + trs = [td for td in doc.xpath('//tr')] + url_nums = {} + + for i in trs: + x = [ j.text_content().strip() for j in i.getchildren()] + key = f2num(x[:3]) + if key: + url_nums[key] = x + + return (content, url_nums) + + +def readDB(db_path): + """ + >>> db_nums = readDB(DATA_DB) + >>> len(db_nums) + 128034 + + """ + sql = """ + select region_code rc, number_start_range nsr, number_end_range ner + from codes_operator + """ + con = sqlite3.connect(db_path) + cur = con.cursor() + cur.execute(sql) + + return map(f2num, cur.fetchall()) + + +def difference(url, db_path): + """ + >>> content, diff = difference(START_URLS[3], DATA_DB) + >>> content, len(diff) + ('text/html; charset=windows-1251', 175) + + """ + content, url_nums = parseURL(url) + db_nums = readDB(db_path) + + diff = set(url_nums.keys()) - set(db_nums) + + if diff: + return content, [ url_nums[i] for i in diff ] + else: + return None, None + + +def insertDB(db_path, data, schema): + """ + """ + table = schema[0] + + create_tbl = """ +CREATE TABLE IF NOT EXISTS '%s' ( + 'id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + 'name' varchar(250), + 'region_code' integer unsigned, + 'number_start_range' integer unsigned, + 'number_end_range' integer unsigned, + 'mobile' bool, + 'region' varchar(250), + 'country' char(4) ) + """ % table + + sql = "insert into %s values (%s)" % schema + + con = sqlite3.connect(db_path) + cur = con.cursor() + cur.execute(create_tbl) + cur.executemany(sql, data) + con.commit() + con.close() + + +def full_update(db_path): + """ + """ + ig_order = itemgetter(4,0,1,2,5) + + for url in START_URLS: + + content, nums = parseURL(url) + + if nums: + data = [nums[i] for i in sorted(nums.keys())] + print url, len(data) + + mob = 'DEF' in url.upper() and 1 or 0 + schema = ('codes_operator', 'NULL,?,?,?,?,%s,?,"RU"' % mob) + insertDB(db_path, map(ig_order, data), schema) + else: + print url, 'No data' + + +def diff_update(db_path): + """ + """ + + ig_order = itemgetter(4,0,1,2,5) + + for url in START_URLS: + + content, diff = difference(url, db_path) + + if diff: + print url, len(diff) + mob = 'DEF' in url.upper() and 1 or 0 + schema = ('codes_operator', 'NULL,?,?,?,?,%s,?,"RU"' % mob) + insertDB(db_path, map(ig_order, diff), schema) + else: + print url, 'No difference' + + +def main(): + """ + >>> main() + + """ + + if len(sys.argv) == 3: + mode = sys.argv[1] + base = sys.argv[2] + if mode == 'full': + full_update(base) + elif mode == 'diff': + diff_update(base) + else: + print 'Usage: %s full|diff base.db' % sys.argv[0] + else: + print 'Usage: %s full|diff base.db' % sys.argv[0] + + +def _test(): + import doctest + doctest.testmod() + +if __name__ == "__main__": + #_test() + main() +