Tuple index Error

* occupationcoder version: 0.2.0
* Python version: 3.6
* Operating System: CentOS

### Description
Hi @aeturrell ! I am getting a bug on occupationcoder/utilities/utilities.py line 319 

`def getKey(item):
    """
    Takes any iterable as input.
    Returns tuple.
    Is used to specify order of argument importance, which can be used for
    sorting and gettin max using multiple criteria.
    >>> getKey(('registered nurse', 90, 4))
    (90, 4, 2)
    """
    return (item[1], item[2], len(item[0].split()))`

I get tuple index problems, but code is apparently ok. Full Traceback is below.




The problem seems to be in a specific data file ( I am looping over several), but looking over the file structure, variable class, string text etc, there is no apparent difference from other files. The code I am running is below, any guesses of where it might be coming from?  Thanks!!!



### What I Did

```
#!/usr/bin/python36

import pandas as pd
from occupationcoder.coder import coder
import os.path
import re
import os
from os import walk
import glob
import nltk
nltk.download('punkt')	
nltk.download('wordnet')
nltk.download('omw-1.4')


dir = '/home/ALL/'
dir1 = '/home/

myCoder = coder.Coder()


d0 = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]

df0 = pd.DataFrame(data=d0, columns=[ 'year', 'month', 'day', 'category_id', 'company', 'date_created','location_raw', 'salary_max', 'salary_min', 'salary_predicted',
       'salary_currency', 'salary_raw', 'title', 'category_name', 'location_path', 'contract_time',
       'contract_type', 'company_id', 'company_name', 'description'] )
df0.to_csv(os.path.join(dir1, 'vacancy_stock_raw.csv'))
df0=pd.read_csv(os.path.join(dir1, 'vacancy_stock_raw.csv'))


#loop over folders and append selected variables into vacancy_stock_raw.csv
def main():
    df2=df0
    
    li=[]
    r = []
    def list_files(dir):
        r = []
    for root, dirs, files in os.walk(dir):
        for name in files:
            filepath = root + os.sep + name
            if filepath.endswith(".csv"):
                print(filepath)
                d = pd.read_csv(filepath, index_col=None, header=0)
                d = d.rename(columns={'title': 'job_title'})
                d = d.rename(columns={'description': 'job_description'})
                d = d.rename(columns={'category_name': 'job_sector'})
                d=d.astype(str)
                d['job_title'] = d['job_title'].str.slice(0, 100)
                d['job_sector'] = d['job_sector'].str.slice(0, 100)
                d['job_description'] = d['job_description'].str.slice(0, 200)
                d = d[[ 'year', 'month', 'day', 'category_id', 'company', 'date_created','location_raw', 'salary_max', 'salary_min', 'salary_predicted',
           'salary_currency', 'job_title', 'job_sector', 'location_path', 'contract_time',
           'contract_type', 'company_id', 'company_name', 'job_description']]
                d['titleno_space'] = d['job_title']
                print('ready')
                df1=myCoder.codedataframe(d)
                print('socdone')
                df1 = df1[[ 'year', 'month', 'day', 'category_id', 'company', 'date_created','location_raw', 'salary_max', 'salary_min', 'salary_predicted',
           'salary_currency', 'job_title', 'job_sector', 'location_path', 'contract_time',
           'contract_type', 'company_id', 'company_name', 'SOC_code']]
                df2=df2.append(df1)
                df2.to_csv(os.path.join(dir1, 'vacancy_stock_raw.csv'))

                
if __name__ == '__main__':
    main()
```

```


WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: 'â€“']
Traceback (most recent call last):
  File "./1adzuna.py", line 67, in <module>
    main()
  File "./1adzuna.py", line 57, in main
    df1=myCoder.codedataframe(d)
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/coder/coder.py", line 104, in codedataframe
    x = res.compute(scheduler='processes')
  File "/usr/local/lib/python3.6/site-packages/dask/base.py", line 156, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/base.py", line 397, in compute
    results = schedule(dsk, keys, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/multiprocessing.py", line 192, in get
    raise_exception=reraise, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/local.py", line 501, in get_async
    raise_exception(exc, tb)
  File "/usr/local/lib/python3.6/site-packages/dask/compatibility.py", line 111, in reraise
    raise exc.with_traceback(tb)
  File "/usr/local/lib/python3.6/site-packages/dask/local.py", line 272, in execute_task
    result = _execute_task(task, data)
  File "/usr/local/lib/python3.6/site-packages/dask/local.py", line 253, in _execute_task
    return func(*args2)
  File "/usr/local/lib/python3.6/site-packages/dask/dataframe/core.py", line 3684, in apply_and_enforce
    df = func(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/utils.py", line 694, in __call__
    return getattr(obj, self.method)(*args, **kwargs)
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/frame.py", line 6928, in apply
    return op.get_result()
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/apply.py", line 186, in get_result
    return self.apply_standard()
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/apply.py", line 292, in apply_standard
    self.apply_series_generator()
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/apply.py", line 321, in apply_series_generator
    results[i] = self.f(v)
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/utilities/utilities.py", line 392, in return_best_match_2
    final_code = max(items(), key=lambda x: getKey(x[1]))
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/utilities/utilities.py", line 392, in <lambda>
    final_code = max(items(), key=lambda x: getKey(x[1]))
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/utilities/utilities.py", line 319, in getKey
    return (item[1], item[2], len(item[0].split()))
IndexError: ('tuple index out of range', 'occurred at index 706')

```



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Tuple index Error #17

Description

What I Did

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Tuple index Error #17

Description

Description

What I Did

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions