Skip to content

Creating the model

Petros edited this page Oct 26, 2023 · 3 revisions

Model creation

_import_from_file

As we can see in the import_model function, the bulk of the model is created using the _import_from_file function. Which obviously makes sense as in the current version, most of our data lives in the text file.

def import_model(filename, instance_options):
    """Returns a completely initialised model.

    Args:
      filename: The file name of the file to import from.
      instance_options: User chosen instance options.

    Returns:
      The model representation of the instance.
    """
    model = _import_from_file(filename, instance_options)
    model.set_project_lists()
    model.set_lecturer_lists()
    model.set_rank_lists()
    return model

What I have to do now, is write some new function that will generate the model from the data provided in the api request body. I have decided that I will send the data from the client as three separate 2D arrays one for each agent in the matching problem. Using these arrays, my new function should create a model that is identical to the model returned by the _import_from_file function.

Here is the function in its entirety.

def _import_from_file(filename, instance_options):
    """Imports a matching instance from file, returning an SPA-STL model.

    Args:
      filename: The file name of the file to import from.
      instance_options: User chosen instance options.

    Returns:
      The model representation of the instance.
    """
    model = Model()
    project_lecturers = []
    lecturer_student_ranks = {}

    with open(filename) as f:
        for index, line in enumerate(f):
            line_split = line.replace(':', '').split()

            # first line
            if index == 0:
                first_line = line.split()
                model.num_students = int(first_line[0])
                model.num_projects = int(first_line[1])
                if instance_options[Instance_options.NUMAGENTS] == 2:
                    model.num_lecturers = int(first_line[1])
                if instance_options[Instance_options.NUMAGENTS] == 3:
                    model.num_lecturers = int(first_line[2])

            # student preference lists
            elif index < model.num_students + 1:
                st_prefs = line_split[1:]
                st_num = index
                pairs_row = _create_pairs_row(model, st_prefs, st_num)
                model.pairs.append(pairs_row)

            # projects information
            elif index < model.num_students + model.num_projects + 1:
                # SPA
                if instance_options[Instance_options.NUMAGENTS] == 3:
                    model.proj_lower_quotas.append(int(line_split[1]))
                    model.proj_upper_quotas.append(int(line_split[2]))
                    project_lecturers.append(int(line_split[3]))

                # HR
                if instance_options[Instance_options.NUMAGENTS] == 2:
                    model.proj_lower_quotas.append(int(line_split[1]))
                    model.proj_upper_quotas.append(int(line_split[2]))
                    proj_num = index - (model.num_students)
                    project_lecturers.append(proj_num)
                    model.lec_lower_quotas.append(int(line_split[1]))
                    model.lec_targets.append(int(line_split[2]))
                    model.lec_upper_quotas.append(int(line_split[2]))
                    proj_num = len(model.proj_lower_quotas)

                    # if hospital preference lists are present, add them to the
                    # model
                    if instance_options[Instance_options.TWOPL]:
                        additional_hosp_ranks = _create_student_ranks(
                            model, line_split[3:], proj_num)
                        lecturer_student_ranks.update(additional_hosp_ranks)

            # SPA only - lecturer information
            elif (index < model.num_students + model.num_projects +
                  model.num_lecturers + 1 and
                  instance_options[Instance_options.NUMAGENTS] == 3):
                model.lec_lower_quotas.append(int(line_split[1]))
                model.lec_targets.append(int(line_split[2]))
                model.lec_upper_quotas.append(int(line_split[3]))
                rank = 1
                lec_num = index - (model.num_students + model.num_projects)

                # if lecturer preference lists are present, add them to the
                # model
                if instance_options[Instance_options.TWOPL]:
                    additional_lec_st_ranks = _create_student_ranks(
                        model, line_split[4:], lec_num)
                    lecturer_student_ranks.update(additional_lec_st_ranks)

    model.proj_lecturers = project_lecturers
    _set_lecturers(model, project_lecturers)
    if instance_options[Instance_options.TWOPL]:
        _set_lecturer_ranks(model, lecturer_student_ranks)

    return model

Let's break it down into more manageable segments.

model = Model()
project_lecturers = []
lecturer_student_ranks = {}

Here we initialise some empty data objects that we'll use later. I'll do the same in my new function, but omit the lecturer_student_ranks dict.

with open(filename) as f:

This just opens the file, so we can safely ignore this part.

for index, line in enumerate(f):
    line_split = line.replace(':', '').split()

This iterates over each line in the file, and then removes any colon characters from any lines that contain them. The lecturer and project options are delimited by a colon and a space so removing the colon allows for the split method to work for all lines.

# first line
if index == 0:
    first_line = line.split()
    model.num_students = int(first_line[0])
    model.num_projects = int(first_line[1])
    if instance_options[Instance_options.NUMAGENTS] == 2:
        model.num_lecturers = int(first_line[1])
    if instance_options[Instance_options.NUMAGENTS] == 3:
        model.num_lecturers = int(first_line[2])

Moving on to the first segment containing actual code logic. The first line of the text file includes information on how many of the remaining lines hold data for each of the three agents. This is something I'm able to extrapolate by looking at the length of each of the three outer arrays.

So the code below achieves the same result:

model.num_students = len(data.students)
model.num_projects = len(data.projects)
model.num_lecturers = len(data.lecturer)

Now on to the next section.

elif index < model.num_students + 1:

This just checks that the line currently being read holds data about students. It isn't something we'll need.

st_prefs = line_split[1:]
st_num = index
pairs_row = _create_pairs_row(model, st_prefs, st_num)
model.pairs.append(pairs_row)

This splits the line into the student index and the student preferences and creates a pair_row which it appends to the pairs attribute of the model.

The same result can be achieved with the code below:

for idx, preferences in enumerate(data.students):  
  pairs_row = _create_pairs_row(model, preferences, idx+1)
  model.pairs.append(pairs_row)

Similarly for the projects there is a check to see if the current line holds project data.

elif index < model.num_students + model.num_projects + 1:

And since in our case the number of agents is always 3 we're only concerned with this first block:

if instance_options[Instance_options.NUMAGENTS] == 3:
    model.proj_lower_quotas.append(int(line_split[1]))
    model.proj_upper_quotas.append(int(line_split[2]))
    project_lecturers.append(int(line_split[3]))

Which I can rewrite as the code below:

for idx, project_info in enumerate(data.projects):
    model.proj_lower_quotas.append(project_info[0])
    model.proj_upper_quotas.append(project_info[1])
    project_lecturers.append(project_info[2])

Note to self: I have a feeling a lot of this can be vectorised, but for now let's keep it simple

Just as in the previous two cases, we have this check to see if the current line holds lecturer information which we can also omit.

elif (index < model.num_students + model.num_projects +
    model.num_lecturers + 1 and
    instance_options[Instance_options.NUMAGENTS] == 3):

Which leaves us with the final block we need to convert:

model.lec_lower_quotas.append(int(line_split[1]))
model.lec_targets.append(int(line_split[2]))
model.lec_upper_quotas.append(int(line_split[3]))
rank = 1
lec_num = index - (model.num_students + model.num_projects)

And here is the converted code:

for lecturer_info in data.lecturers:
    model.lec_lower_quotas.append(lecturer_info[0])
    model.lec_targets.append(lecturer_info[1])
    model.lec_upper_quotas.append(lecturer_info[2])

The last thing we have to do before we can return the model is set the proj_lecturers attribute on the model. We can copy paste this code as is.

model.proj_lecturers = project_lecturers
_set_lecturers(model, project_lecturers)

_import_from_obj

And so here is our completed function:

def _import_from_obj(data):
    model = Model()
    project_lecturers = []

    model.num_students = len(data.students)
    model.num_projects = len(data.projects)
    model.num_lecturers = len(data.lecturer)

    # student preference lists
    for idx, preferences in enumerate(data.students):
        pairs_row = _create_pairs_row(model, preferences, idx+1)
        model.pairs.append(pairs_row)

    # projects information
    for project_info in data.projects:
        model.proj_lower_quotas.append(project_info[0])
        model.proj_upper_quotas.append(project_info[1])
        project_lecturers.append(project_info[2])  # lecturer id

    # lecturer information
    for lecturer_info in data.lecturers:
        model.lec_lower_quotas.append(lecturer_info[0])
        model.lec_targets.append(lecturer_info[1])
        model.lec_upper_quotas.append(lecturer_info[2])

    model.proj_lecturers = project_lecturers
    _set_lecturers(model, project_lecturers)

    return model

Clone this wiki locally