Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions inc_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# $Rev$
# $Date$
#

from operator import itemgetter
import lxml.html
import sqlite3
import sys
import urllib


BASE = 10000000
DATA_DB = 'test.db'


START_URLS = [
'http://www.rossvyaz.ru/docs/articles/ABC-3x.html',
'http://www.rossvyaz.ru/docs/articles/ABC-4x.html',
'http://www.rossvyaz.ru/docs/articles/ABC-8x.html',
'http://www.rossvyaz.ru/docs/articles/DEF-9x.html',
]

def f2num(x):
"""
>>> f2num(['900', '0000000', '0099999'])
(9000000000, 9000099999)

"""
try:
x = map(int, x[:3])
return (x[0] * BASE + x[1], x[0] * BASE + x[2])
except UnicodeEncodeError:
return None


def parseURL(url):
"""
http://stackoverflow.com/questions/11795077/simple-example-regarding-how-to-parseURL-data-from-html-output-using-lxml

>>> content, nums = parseURL(START_URLS[3])
>>> content
'text/html; charset=windows-1251'
"""
p = urllib.urlopen(url)
doc = lxml.html.document_fromstring(p.read())
content = doc.xpath('//meta')[0].get('content')

trs = [td for td in doc.xpath('//tr')]
url_nums = {}

for i in trs:
x = [ j.text_content().strip() for j in i.getchildren()]
key = f2num(x[:3])
if key:
url_nums[key] = x

return (content, url_nums)


def readDB(db_path):
"""
>>> db_nums = readDB(DATA_DB)
>>> len(db_nums)
128034

"""
sql = """
select region_code rc, number_start_range nsr, number_end_range ner
from codes_operator
"""
con = sqlite3.connect(db_path)
cur = con.cursor()
cur.execute(sql)

return map(f2num, cur.fetchall())


def difference(url, db_path):
"""
>>> content, diff = difference(START_URLS[3], DATA_DB)
>>> content, len(diff)
('text/html; charset=windows-1251', 175)

"""
content, url_nums = parseURL(url)
db_nums = readDB(db_path)

diff = set(url_nums.keys()) - set(db_nums)

if diff:
return content, [ url_nums[i] for i in diff ]
else:
return None, None


def insertDB(db_path, data, schema):
"""
"""
table = schema[0]

create_tbl = """
CREATE TABLE IF NOT EXISTS '%s' (
'id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
'name' varchar(250),
'region_code' integer unsigned,
'number_start_range' integer unsigned,
'number_end_range' integer unsigned,
'mobile' bool,
'region' varchar(250),
'country' char(4) )
""" % table

sql = "insert into %s values (%s)" % schema

con = sqlite3.connect(db_path)
cur = con.cursor()
cur.execute(create_tbl)
cur.executemany(sql, data)
con.commit()
con.close()


def full_update(db_path):
"""
"""
ig_order = itemgetter(4,0,1,2,5)

for url in START_URLS:

content, nums = parseURL(url)

if nums:
data = [nums[i] for i in sorted(nums.keys())]
print url, len(data)

mob = 'DEF' in url.upper() and 1 or 0
schema = ('codes_operator', 'NULL,?,?,?,?,%s,?,"RU"' % mob)
insertDB(db_path, map(ig_order, data), schema)
else:
print url, 'No data'


def diff_update(db_path):
"""
"""

ig_order = itemgetter(4,0,1,2,5)

for url in START_URLS:

content, diff = difference(url, db_path)

if diff:
print url, len(diff)
mob = 'DEF' in url.upper() and 1 or 0
schema = ('codes_operator', 'NULL,?,?,?,?,%s,?,"RU"' % mob)
insertDB(db_path, map(ig_order, diff), schema)
else:
print url, 'No difference'


def main():
"""
>>> main()

"""

if len(sys.argv) == 3:
mode = sys.argv[1]
base = sys.argv[2]
if mode == 'full':
full_update(base)
elif mode == 'diff':
diff_update(base)
else:
print 'Usage: %s full|diff base.db' % sys.argv[0]
else:
print 'Usage: %s full|diff base.db' % sys.argv[0]


def _test():
import doctest
doctest.testmod()

if __name__ == "__main__":
#_test()
main()