Hanukkah of Data 5784 - Day 7 The Meet Cute

:: programming, python, puzzle

The Task

We’re given the following clues about the ex-boyfriend:

  • Bought a similar item at the same time
  • Both items (Bargain Hunter’s & ex-boyfriend’s) had a color
  • The color of the items was different

Solution

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pandas as pd
import re
from datetime import datetime, timedelta

def solve():
    customers   = pd.read_csv('noahs-customers.csv')
    orders      = pd.read_csv('noahs-orders.csv', parse_dates=['ordered'])
    order_items = pd.read_csv('noahs-orders_items.csv')
    products    = pd.read_csv('noahs-products.csv')
    data        = customers.merge(orders).merge(order_items).merge(products)

    is_bargain_hunter = data['phone'] == '585-838-9161'
    has_color         = data['sku'].str.startswith('COL')
    is_in_stock       = data['ordered'] == data['shipped']

    bargain_orders = data[has_color & is_bargain_hunter & is_in_stock][['desc','ordered']]

    # Loop over the Bargain Hunter's orders
    for _, row in bargain_orders.iterrows():
        desc    = row['desc']
        ordered = row['ordered']

        # Strip off the color portion of the product descriptions, so we can compare w/o color
        desc_prefix = re.sub(r' \([a-z]+\)', '', desc)

        # Restrict to w/in 10 minutes of Bargain Hunter's purchase
        delta        = timedelta(minutes=10)
        similar_time = (data['ordered'] >= (ordered - delta)) & (data['ordered'] <= (ordered + delta))

        # Restrict to order items for similar products e.g. "Noah's Poster"
        similar_item = data['desc'].str.startswith(desc_prefix)

        items = data[similar_time & similar_item][['customerid','desc']].values.tolist()

        # Restrict to different colored products
        custids = [ custid for custid, other_desc in items if other_desc != desc ]

        # If we have only one, then we've found our person
        if len(custids) == 1:
            return customers[customers['customerid'] == custids[0]].iloc[0]['phone']

# ---------------------------------------------------------------------------------------------

assert solve() == '838-335-7157'