forked from kldtz/CharSplit
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdoc_server.py
More file actions
executable file
·96 lines (83 loc) · 2.82 KB
/
doc_server.py
File metadata and controls
executable file
·96 lines (83 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/python3
# Copyright eBrevia.com 2019
"""Document splitting server."""
import logging
import socket
import sys
import resource
import doc_config
import doc_split
result_map = {}
dict_mode = False
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# pylint: disable=invalid-name
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
def tabular(tab_map):
"""Convert Python dict into sorted output lines "unsplit\tsplit\n"."""
result = []
for orig in sorted(tab_map.keys()):
split = tab_map[orig]
result.append(orig + '\t' + split)
return '\n'.join(result) + '\n'
def run(client_socket, client_address, dict):
"""Reads, splits, writes."""
input_bytes = bytearray(b'')
blockno = 0
while True:
block = client_socket.recv(2048)
#logger.info("Read block %d", blockno)
blockno += 1
if not block:
break
input_bytes += block
#logger.info("Read %d bytes", len(input_bytes))
input_str = input_bytes.decode()
result_map.clear()
output_str = doc_split.doc_split(input_str, dict, result_map)
if dict_mode:
output_str = tabular(result_map)
output_bytes = output_str.encode()
#logger.info("Split the text")
client_socket.sendall(output_bytes)
#logger.info("Written %d bytes", len(output_bytes))
client_socket.shutdown(socket.SHUT_WR)
#logger.info("Client at %s disconnecting", client_address)
def main():
"""Main server program.
Reads the whole of standard input, splits it all,
sends the result to standard output.
Usage: doc_server [-p][-d] port dict
"""
if len(sys.argv) > 3:
dict = sys.argv[3]
else:
dict = doc_config.DEFAULT_DICT
if len(sys.argv) > 2:
port = int(sys.argv[2])
else:
port = doc_config.DEFAULT_PORT
global dict_mode
if len(sys.argv) > 1:
dict_mode = (sys.argv[1] == '-d')
# Load dictionaries and report memory usage while quiescent
doc_split.load_known_words(dict)
mem_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss // 1024
logger.info("Memory usage: %d MB", mem_mb)
while True:
try:
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', port))
logger.info("Server started")
while True:
server.listen(5)
client, client_address = server.accept()
run(client, client_address, dict)
except Exception:
# trap all errors so the server doesn't die
logger.exception("Error in doc_server main loop:");
server.close()
if __name__ == "__main__":
main()