Skip to content

Tuple index Error #17

@ccomunello

Description

@ccomunello
  • occupationcoder version: 0.2.0
  • Python version: 3.6
  • Operating System: CentOS

Description

Hi @aeturrell ! I am getting a bug on occupationcoder/utilities/utilities.py line 319

def getKey(item): """ Takes any iterable as input. Returns tuple. Is used to specify order of argument importance, which can be used for sorting and gettin max using multiple criteria. >>> getKey(('registered nurse', 90, 4)) (90, 4, 2) """ return (item[1], item[2], len(item[0].split()))

I get tuple index problems, but code is apparently ok. Full Traceback is below.

The problem seems to be in a specific data file ( I am looping over several), but looking over the file structure, variable class, string text etc, there is no apparent difference from other files. The code I am running is below, any guesses of where it might be coming from? Thanks!!!

What I Did

#!/usr/bin/python36

import pandas as pd
from occupationcoder.coder import coder
import os.path
import re
import os
from os import walk
import glob
import nltk
nltk.download('punkt')	
nltk.download('wordnet')
nltk.download('omw-1.4')


dir = '/home/ALL/'
dir1 = '/home/

myCoder = coder.Coder()


d0 = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]

df0 = pd.DataFrame(data=d0, columns=[ 'year', 'month', 'day', 'category_id', 'company', 'date_created','location_raw', 'salary_max', 'salary_min', 'salary_predicted',
       'salary_currency', 'salary_raw', 'title', 'category_name', 'location_path', 'contract_time',
       'contract_type', 'company_id', 'company_name', 'description'] )
df0.to_csv(os.path.join(dir1, 'vacancy_stock_raw.csv'))
df0=pd.read_csv(os.path.join(dir1, 'vacancy_stock_raw.csv'))


#loop over folders and append selected variables into vacancy_stock_raw.csv
def main():
    df2=df0
    
    li=[]
    r = []
    def list_files(dir):
        r = []
    for root, dirs, files in os.walk(dir):
        for name in files:
            filepath = root + os.sep + name
            if filepath.endswith(".csv"):
                print(filepath)
                d = pd.read_csv(filepath, index_col=None, header=0)
                d = d.rename(columns={'title': 'job_title'})
                d = d.rename(columns={'description': 'job_description'})
                d = d.rename(columns={'category_name': 'job_sector'})
                d=d.astype(str)
                d['job_title'] = d['job_title'].str.slice(0, 100)
                d['job_sector'] = d['job_sector'].str.slice(0, 100)
                d['job_description'] = d['job_description'].str.slice(0, 200)
                d = d[[ 'year', 'month', 'day', 'category_id', 'company', 'date_created','location_raw', 'salary_max', 'salary_min', 'salary_predicted',
           'salary_currency', 'job_title', 'job_sector', 'location_path', 'contract_time',
           'contract_type', 'company_id', 'company_name', 'job_description']]
                d['titleno_space'] = d['job_title']
                print('ready')
                df1=myCoder.codedataframe(d)
                print('socdone')
                df1 = df1[[ 'year', 'month', 'day', 'category_id', 'company', 'date_created','location_raw', 'salary_max', 'salary_min', 'salary_predicted',
           'salary_currency', 'job_title', 'job_sector', 'location_path', 'contract_time',
           'contract_type', 'company_id', 'company_name', 'SOC_code']]
                df2=df2.append(df1)
                df2.to_csv(os.path.join(dir1, 'vacancy_stock_raw.csv'))

                
if __name__ == '__main__':
    main()


WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '–']
Traceback (most recent call last):
  File "./1adzuna.py", line 67, in <module>
    main()
  File "./1adzuna.py", line 57, in main
    df1=myCoder.codedataframe(d)
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/coder/coder.py", line 104, in codedataframe
    x = res.compute(scheduler='processes')
  File "/usr/local/lib/python3.6/site-packages/dask/base.py", line 156, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/base.py", line 397, in compute
    results = schedule(dsk, keys, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/multiprocessing.py", line 192, in get
    raise_exception=reraise, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/local.py", line 501, in get_async
    raise_exception(exc, tb)
  File "/usr/local/lib/python3.6/site-packages/dask/compatibility.py", line 111, in reraise
    raise exc.with_traceback(tb)
  File "/usr/local/lib/python3.6/site-packages/dask/local.py", line 272, in execute_task
    result = _execute_task(task, data)
  File "/usr/local/lib/python3.6/site-packages/dask/local.py", line 253, in _execute_task
    return func(*args2)
  File "/usr/local/lib/python3.6/site-packages/dask/dataframe/core.py", line 3684, in apply_and_enforce
    df = func(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/dask/utils.py", line 694, in __call__
    return getattr(obj, self.method)(*args, **kwargs)
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/frame.py", line 6928, in apply
    return op.get_result()
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/apply.py", line 186, in get_result
    return self.apply_standard()
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/apply.py", line 292, in apply_standard
    self.apply_series_generator()
  File "/home/cc18002/.local/lib/python3.6/site-packages/pandas/core/apply.py", line 321, in apply_series_generator
    results[i] = self.f(v)
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/utilities/utilities.py", line 392, in return_best_match_2
    final_code = max(items(), key=lambda x: getKey(x[1]))
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/utilities/utilities.py", line 392, in <lambda>
    final_code = max(items(), key=lambda x: getKey(x[1]))
  File "/home/cc18002/.local/lib/python3.6/site-packages/occupationcoder/utilities/utilities.py", line 319, in getKey
    return (item[1], item[2], len(item[0].split()))
IndexError: ('tuple index out of range', 'occurred at index 706')

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions