forked from mead-ml/mead-baseline
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtag-text.py
More file actions
66 lines (56 loc) · 2.94 KB
/
tag-text.py
File metadata and controls
66 lines (56 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from __future__ import print_function
import baseline as bl
import argparse
import os
from baseline.utils import str2bool, read_conll
parser = argparse.ArgumentParser(description='Tag text with a model')
parser.add_argument('--model', help='A tagger model with extended features', required=True, type=str)
parser.add_argument('--text', help='raw value', type=str)
parser.add_argument('--conll', help='is file type conll?', type=str2bool, default=False)
parser.add_argument('--features', help='(optional) features in the format feature_name:index (column # in conll) or '
'just feature names (assumed sequential)', default=[], nargs='+')
parser.add_argument('--backend', help='backend', default='tf')
parser.add_argument('--device', help='device')
parser.add_argument('--remote', help='(optional) remote endpoint', type=str) # localhost:8500
parser.add_argument('--name', help='(optional) signature name', type=str)
parser.add_argument('--preproc', help='(optional) where to perform preprocessing', choices={'client', 'server'}, default='client')
parser.add_argument('--export_mapping', help='mapping between features and the fields in the grpc/ REST '
'request, eg: token:word ner:ner. This should match with the '
'`exporter_field` definition in the mead config',
default=[], nargs='+')
args = parser.parse_args()
def create_export_mapping(feature_map_strings):
feature_map_strings = [x.strip() for x in feature_map_strings if x.strip()]
if not feature_map_strings:
return {}
else:
return {x[0]: x[1] for x in [y.split(':') for y in feature_map_strings]}
def feature_index_mapping(features):
if not features:
return {}
elif ':' in features[0]:
return {feature.split(':')[0]: int(feature.split(':')[1]) for feature in features}
else:
return {feature: index for index, feature in enumerate(features)}
if os.path.exists(args.text) and os.path.isfile(args.text):
texts = []
if args.conll:
feature_indices = feature_index_mapping(args.features)
for sentence in read_conll(args.text):
if feature_indices:
texts.append([{k: line[v] for k, v in feature_indices.items()} for line in sentence])
else:
texts.append([line[0] for line in sentence])
else:
with open(args.text, 'r') as f:
for line in f:
text = line.strip().split()
texts += [text]
else:
texts = [args.text.split()]
m = bl.TaggerService.load(args.model, backend=args.backend, remote=args.remote,
name=args.name, preproc=args.preproc, device=args.device)
for sen in m.predict(texts, export_mapping=create_export_mapping(args.export_mapping)):
for word_tag in sen:
print("{} {}".format(word_tag['text'], word_tag['label']))
print()