diff --git a/README.rst b/README.rst index ed6919b..1c15611 100644 --- a/README.rst +++ b/README.rst @@ -47,6 +47,41 @@ Example Usage: >>> print(translator.translate('Привет, мир!', lang_from='ru', lang_to='en')) Hello World! +3. Translator Text API V2 to V3 Migration +----------------------------------------- + +Translator Text API V2 was deprecated on April 30, 2018 and will be discontinued on April 30, 2019. So this API wrapper has been updated to V3 API version. + +Microsoft has moved some features to other APIs and other are not longer supported. Check the official `Translator Text API V2 to V3 Migration`_ documentation for details. + +With this update we have tried to keep the functions input and output as they were, but it has not been posible in all cases. + +No changes needed for: + +- translate +- break_sentences +- get_langs +- get_lang_names +- detect_lang +- detect_langs + +Output has changed for: + +- translate_array: The output json is different, check official docs for details +- translate_array2: The output json is different, check official docs for details + +Input has changed for: + +- get_langs: speakable input parameter has been removed + +Following functions have been removed because the API features have been moved to other APIs: + +- get_translations +- add_translation +- speak +- speak_to_file + + Testing ======= To run tests you need to set ``TEST_MSTRANSLATOR_SUBSCRIPTION_KEY`` environment variable @@ -58,3 +93,4 @@ and install `tox`_ package. After that run shell command: .. _Microsoft Azure: http://azure.com .. _tox: http://tox.readthedocs.org/en/latest/ +.. _Translator Text API V2 to V3 Migration: https://docs.microsoft.com/en-us/azure/cognitive-services/translator/migrate-to-v3 diff --git a/mstranslator.py b/mstranslator.py index 5aa2057..11032e6 100644 --- a/mstranslator.py +++ b/mstranslator.py @@ -43,7 +43,8 @@ def __call__(self, r): def request_token(self): headers = { - 'Ocp-Apim-Subscription-Key': self.subscription_key + 'Ocp-Apim-Subscription-Key': self.subscription_key, + 'Content-type': 'application/json', } resp = requests.post(self.access_url, headers=headers) if resp.status_code == 200: @@ -64,17 +65,20 @@ def token(self): class Translator(object): - api_url = "https://api.microsofttranslator.com/v2/ajax.svc/" + api_url = "https://api.cognitive.microsofttranslator.com/" def __init__(self, subscription_key): self.auth = AccessToken(subscription_key) def make_url(self, action): - return self.api_url + action + return self.api_url + action + '?api-version=3.0' - def make_request(self, action, params=None): + def make_request(self, action, params=None, json=None, headers=None, is_post=True): url = self.make_url(action) - resp = requests.get(url, auth=self.auth, params=params) + if(is_post): + resp = requests.post(url, auth=self.auth, params=params, json=json, headers=headers) + else: + resp = requests.get(url, auth=self.auth, params=params, headers=headers) return self.make_response(resp) def make_response(self, resp): @@ -89,7 +93,7 @@ def make_response(self, resp): return data - def _translate(self, action, text_params, lang_from, lang_to, contenttype, category): + def _translate(self, action, json, lang_from, lang_to, contenttype, category, include_alignment=False): if not lang_to: raise ValueError('lang_to parameter is required') if contenttype not in ('text/plain', 'text/html'): @@ -97,135 +101,98 @@ def _translate(self, action, text_params, lang_from, lang_to, contenttype, categ params = { 'to': lang_to, - 'contentType': contenttype, 'category': category, + 'includeAlignment': 'true' if include_alignment else 'false' } + if lang_from: params['from'] = lang_from - params.update(text_params) - return self.make_request(action, params) + return self.make_request(action, params, json) def translate(self, text, lang_from=None, lang_to=None, contenttype='text/plain', category='general'): - params = { - 'text': text, - } - return self._translate('Translate', params, lang_from, lang_to, + json = [{ + 'Text' : text + }] + response = self._translate('translate', json, lang_from, lang_to, contenttype, category) + if 'error' in response: + raise ArgumentOutOfRangeException(response['error']['message']) + else: + return response[0]['translations'][0]['text'] def translate_array(self, texts=[], lang_from=None, lang_to=None, contenttype='text/plain', category='general'): - params = { - 'texts': json.dumps(texts), - } - return self._translate('TranslateArray', params, lang_from, lang_to, + json = [ + {'Text' : text} for text in texts + ] + return self._translate('translate', json, lang_from, lang_to, contenttype, category) def translate_array2(self, texts=[], lang_from=None, lang_to=None, contenttype='text/plain', category='general'): - params = { - 'texts': json.dumps(texts), - } - return self._translate('TranslateArray2', params, lang_from, lang_to, - contenttype, category) - - def get_translations(self, text, lang_from, lang_to, max_n=10, contenttype='text/plain', category='general', - url=None, user=None, state=None): - options = { - 'Category': category, - 'ContentType': contenttype, - } - if url: - options['Uri'] = url - if user: - options['User'] = user - if state: - options['State'] = state - params = { - 'text': text, - 'to': lang_to, - 'from': lang_from, - 'maxTranslations': max_n, - 'options': json.dumps(options) - } - return self.make_request('GetTranslations', params) + json = [ + {'Text' : text} for text in texts + ] + return self._translate('translate', json, lang_from, lang_to, + contenttype, category, include_alignment=True) def break_sentences(self, text, lang): if len(text) > 10000: raise ValueError('The text maximum length is 10000 characters') params = { - 'text': text, 'language': lang, } - lengths = self.make_request('BreakSentences', params) + json = [ + {'Text': text} + ] + lengths = self.make_request('breaksentence', params, json) if isinstance(text, bytes): text = text.decode('utf-8') c = 0 result = [] - for i in lengths: + for i in lengths[0]['sentLen']: result.append(text[c:c+i]) c += i return result - def add_translation(self, text_orig, text_trans, lang_from, lang_to, user, rating=1, - contenttype='text/plain', category='general', url=None): - if len(text_orig) > 1000: - raise ValueError('The original text maximum length is 1000 characters') - if len(text_trans) > 2000: - raise ValueError('The translated text maximum length is 1000 characters') - if contenttype not in ('text/plain', 'text/html'): - raise ValueError('Invalid contenttype value') - if not -10 < rating < 10 or not isinstance(rating, int): - raise ValueError('Raiting must be an integer value between -10 and 10') + def get_langs(self): params = { - 'originalText': text_orig, - 'translatedText': text_trans, - 'from': lang_from, - 'to': lang_to, - 'user': user, - 'contentType': contenttype, - 'rating': rating, - 'category': category, + 'scope': 'translation' } - if url: - params['uri'] = url - return self.make_request('AddTranslation', params) - def get_langs(self, speakable=False): - action = 'GetLanguagesForSpeak' if speakable else 'GetLanguagesForTranslate' - return self.make_request(action) + response = self.make_request('languages', params, is_post=False) + result = [lang for lang in response['translation']] + return result def get_lang_names(self, langs, lang_to): params = { - 'locale': lang_to, - 'languageCodes': json.dumps(langs), + 'scope': 'translation' + } + headers = { + 'Accept-Language': lang_to } - return self.make_request('GetLanguageNames', params) + response = self.make_request('languages', params, headers=headers, is_post=False) + result = [] + for lang in langs: + if lang in response['translation']: + result.append(response['translation'][lang]['name']) + return result def detect_lang(self, text): - return self.make_request('Detect', {'text': text}) + json = [ + {'Text': text} + ] + response = self.make_request('detect', json=json) + return response[0]["language"] def detect_langs(self, texts=[]): - return self.make_request('DetectArray', {'texts': json.dumps(texts)}) - - def speak(self, text, lang, format='audio/wav', best_quality=False): - if format not in ('audio/wav', 'audio/mp3'): - raise ValueError('Invalid format value') - params = { - 'text': text, - 'language': lang, - 'format': format, - 'options': 'MaxQuality' if best_quality else 'MinSize', - } - return self.make_request('Speak', params) - - def speak_to_file(self, file, *args, **kwargs): - resp = requests.get(self.speak(*args, **kwargs)) - if isinstance(file, basestring): - with open(file, 'wb'): - file.write(resp.content) - elif hasattr(file, 'write'): - file.write(resp.content) - else: - raise ValueError('Expected filepath or a file-like object') + json = [ + {'Text' : text} for text in texts + ] + response = self.make_request('detect', json=json) + parsedResponse = [ + language["language"] for language in response + ] + return parsedResponse diff --git a/tests.py b/tests.py index e3ad804..b26f05a 100644 --- a/tests.py +++ b/tests.py @@ -44,38 +44,28 @@ def setUp(self): self.translator_mock = TranslatorMock(SUBSCRIPTION_KEY) def test_translate(self): - t = self.translator.translate('world', 'en', 'ru') - self.assertEqual('мир', t) + t = self.translator.translate('world', 'en', 'es') + self.assertEqual('Mundo', t) def test_translate_exception(self): self.assertRaises(ArgumentOutOfRangeException, self.translator.translate, 'world', 'en', 'asdf') def test_translate_array(self): - ts = self.translator.translate_array(['hello', 'world'], 'en', 'ru') - translations = [t['TranslatedText'] for t in ts] - self.assertEqual(['Привет', 'мир'], translations) + ts = self.translator.translate_array(['hello', 'world'], 'en', 'es') + translations = [t['translations'][0]['text'] for t in ts] + self.assertEqual(['Hola', 'Mundo'], translations) def test_translate_array2(self): - ts = self.translator.translate_array2(['hello', 'world', 'Hello. How are you?'], 'en', 'ru') - translations = [t['TranslatedText'] for t in ts] - self.assertEqual(['Привет', 'мир', 'Привет. Как ваши дела?'], translations) - alignments = [t['Alignment'] for t in ts] - self.assertEqual(['0:4-0:5', '0:4-0:2', '0:5-0:6 7:18-8:21'], alignments) - - def test_get_translations(self): - t = self.translator.get_translations('world', 'en', 'ru') - self.assertIsInstance(t, dict) - self.assertIn('Translations', t) + ts = self.translator.translate_array2(['The answer lies in machine translation.'], 'en', 'es') + translations = [t['translations'][0]['text'] for t in ts] + self.assertEqual(['La respuesta radica en la traducción automática.'], translations) + alignments = [t['translations'][0]['alignment']['proj'] for t in ts] + self.assertEqual('0:2-0:1 4:9-3:11 11:14-13:18 16:17-20:21 19:25-37:46 27:37-26:35 38:38-47:47', alignments[0]) def test_break_sentences(self): t = self.translator.break_sentences('Hello. How are you?', 'en') self.assertEqual(['Hello. ', 'How are you?'], t) - def test_add_translation(self): - url = self.translator_mock.add_translation('orig', 'trans', 'en', 'ru', user='test') - self.assertIn('originalText=orig', url) - self.assertIn('translatedText=trans', url) - def test_get_langs(self): langs = self.translator.get_langs() self.assertIsInstance(langs, list) @@ -85,22 +75,8 @@ def test_get_lang_names(self): lang_names = self.translator.get_lang_names(['ru', 'en'], 'en') self.assertEqual(['Russian', 'English'], lang_names) - def test_get_speackable_langs(self): - langs = self.translator.get_langs(speakable=True) - self.assertIsInstance(langs, list) - self.assertIn('en-us', langs) - def test_detect_lang(self): self.assertEqual('en', self.translator.detect_lang('Hello')) def test_detect_langs(self): self.assertEqual(['en', 'ru'], self.translator.detect_langs(['Hello', 'Привет'])) - - def test_speak(self): - self.assertIsNotNone(self.translator.speak('Hello', 'en')) - - def test_speak_to_file(self): - s = StringIO() - self.translator.speak_to_file(s, 'Hello', 'en') - s.seek(0) - self.assertTrue(len(s.read()) > 0)