Skip to content

Joins

Please, make sure you've covered Reference / Basics first.

c.join(left_conversion, right_conversion, condition, how="inner") defines join conversion, which returns an iterator of (left_element, right_element) tuples.

  • left_conversion defines the left part of a join
  • right_conversion defines the right part of a join
  • condition any condition defined as a conversion, where c.LEFT and c.RIGHT reference elements of the left and right sequences.
  • how is any of "inner" | "left" | "right" | "outer"
from convtools import conversion as c

collection_1 = [
    {"id": 1, "name": "Nick"},
    {"id": 2, "name": "Joash"},
    {"id": 3, "name": "Bob"},
]
collection_2 = [
    {"ID": "3", "age": 17, "country": "GB"},
    {"ID": "2", "age": 21, "country": "US"},
    {"ID": "1", "age": 18, "country": "CA"},
]
input_data = (collection_1, collection_2)

conv = (
    c.join(
        c.item(0),
        c.item(1),
        c.and_(
            c.LEFT.item("id") == c.RIGHT.item("ID").as_type(int),
            c.RIGHT.item("age") >= 18,
        ),
        how="left",
    )
    .pipe(
        c.list_comp(
            {
                "id": c.item(0, "id"),
                "name": c.item(0, "name"),
                "age": c.item(1, "age", default=None),
                "country": c.item(1, "country", default=None),
            }
        )
    )
    .gen_converter(debug=True)
)

assert conv(input_data) == [
    {"id": 1, "name": "Nick", "age": 18, "country": "CA"},
    {"id": 2, "name": "Joash", "age": 21, "country": "US"},
    {"id": 3, "name": "Bob", "age": None, "country": None},
]
def aggregate_i(_none, data_, *, __v=__naive_values__["__v"]):
    agg_data_i_v0 = _none

    checksum_ = 0
    it_ = iter(data_)
    for row_i in it_:
        if row_i["age"] >= 18:
            if agg_data_i_v0 is _none:
                checksum_ += 1
                agg_data_i_v0 = defaultdict(list)
                agg_data_i_v0[int(row_i["ID"])].append(row_i)
            else:
                agg_data_i_v0[int(row_i["ID"])].append(row_i)
        if checksum_ == 1:
            globals()["__BROKEN_EARLY__"] = True  # DEBUG ONLY
            break
    for row_i in it_:
        if row_i["age"] >= 18:
            agg_data_i_v0[int(row_i["ID"])].append(row_i)

    return __v if (agg_data_i_v0 is _none) else (setattr(agg_data_i_v0, "default_factory", None) or agg_data_i_v0)

def join_(left_, right_, _none):
    hash_to_right_items = aggregate_i(_none, right_)
    del right_
    for left_item in left_:
        left_key = left_item["id"]
        right_items = iter(((hash_to_right_items[left_key] if (left_key in hash_to_right_items) else ())))
        right_item = next(right_items, _none)
        if right_item is _none:
            yield left_item, None
        else:
            yield left_item, right_item
            for right_item in right_items:
                yield left_item, right_item

def converter(data_, *, __get_2_or_default=__naive_values__["__get_2_or_default"]):
    global __none__
    _none = __none__
    try:
        return [
            {"id": i[0]["id"], "name": i[0]["name"], "age": __get_2_or_default(i, 1, "age", None), "country": __get_2_or_default(i, 1, "country", None)}
            for i in join_(data_[0], data_[1], _none)
        ]
    except __exceptions_to_dump_sources:
        __convtools__code_storage.dump_sources()
        raise