from collections import defaultdict, Counter

import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (8, 8)


girls = ['alice', 'allie', 'bernice', 'brenda', 'clarice', 'cilly']
boys = ['chris', 'christopher', 'arald', 'arnold', 'bob']


[(b, g) for b in boys for g in girls if b[0] == g[0]]

[('chris', 'clarice'),
 ('chris', 'cilly'),
 ('christopher', 'clarice'),
 ('christopher', 'cilly'),
 ('arald', 'alice'),
 ('arald', 'allie'),
 ('arnold', 'alice'),
 ('arnold', 'allie'),
 ('bob', 'bernice'),
 ('bob', 'brenda')]


letterGirls = {}
for girl in girls:
    letterGirls.setdefault(girl[0], []).append(girl)

[(b, g) for b in boys for g in letterGirls[b[0]]]

[('chris', 'clarice'),
 ('chris', 'cilly'),
 ('christopher', 'clarice'),
 ('christopher', 'cilly'),
 ('arald', 'alice'),
 ('arald', 'allie'),
 ('arnold', 'alice'),
 ('arnold', 'allie'),
 ('bob', 'bernice'),
 ('bob', 'brenda')]


>>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
>>> d = defaultdict(list)
>>> for k, v in s:
...     d[k].append(v)
...
>>> list(d.items())
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]

[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]


class Bunch(dict):
    def __init__(self, *args, **kwds):
        super(Bunch, self).__init__(*args, **kwds)
        self.__dict__ = self


>>> T = Bunch
>>> t = T(left=T(left="a", right="b"), right=T(left="c"))
>>> t.left

{'left': 'a', 'right': 'b'}


>>> t.left.right

'b'


>>> t['left']['right']

'b'


>>> "left" in t.right

True


"right" in t.right

False


class Bunch(object):
    def __init__(self, **kwds):
        self.__dict__.update(kwds)


from types import SimpleNamespace

x, y = 32, 64
point = SimpleNamespace(datum=y, squared=y*y, coord=x)
point

namespace(datum=64, squared=4096, coord=32)


point.datum, point.squared, point.coord

(64, 4096, 32)


[i for i in point]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-18-70c474fd2112> in <module>
----> 1 [i for i in point]

TypeError: 'types.SimpleNamespace' object is not iterable


def fast_countdown(count):
    nums = []
    for i in range(count):
        nums.append(i)
    nums.reverse()
    return nums

def slow_countdown(count):
    nums = []
    for i in range(count):
        nums.insert(0, i)
    return nums

def printer(lst, chunk=10):
    print("{}...{}".format(" ".join(map(str, lst[:chunk])),
                           " ".join(map(str, lst[-chunk:]))))


%timeit nums = fast_countdown(10**5)

5.13 ms ± 118 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


%timeit nums = slow_countdown(10**5)

1.61 s ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


from collections import deque

def enhanced_slow_countdown(count):
    nums = deque()
    for i in range(count):
        nums.appendleft(i)
    return nums


%timeit nums = enhanced_slow_countdown(10**5)

5.19 ms ± 159 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


from random import randrange

max_value = 10000
checks = 1000
L = [randrange(max_value) for i in range(checks)]


%timeit [randrange(max_value) in L for _ in range(checks)]

12.7 ms ± 644 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


S = set(L) # convert the list to a set object.

%timeit [randrange(max_value) in S for _ in range(checks)]

439 µs ± 31.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


lists = [[1, 2], [3, 4, 5], [6]]
sum(lists, [])

[1, 2, 3, 4, 5, 6]


res = []
for lst in lists:
    res.extend(lst)
res

[1, 2, 3, 4, 5, 6]


def string_producer(length):
    return ''.join([chr(randrange(ord('a'), ord('z'))) for _ in range(length)])


%%timeit 

s = ""
for chunk in string_producer(10**5):
    s += chunk

74.4 ms ± 5.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


%%timeit

chunks = []
for chunk in string_producer(10**5):
    chunks.append(chunk)
s = ''.join(chunks)

61.5 ms ± 1.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


%timeit s = ''.join(string_producer(10**5))

60.1 ms ± 2.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


def perm_isomorphism(M, domain):
    iso = dict(enumerate(domain))
    return [iso[M[i]] for i in range(len(M))]

def fix_perm(M, fix):
    return [M[i] if i in fix else i for i in range(len(M))]


def naive_max_perm(M, A=None):
    '''
    Fix a permutation such that it is one-to-one and maximal, recursively.
    
    consumes:
    M - a permutation as a list of integers
    A - a set of positions allowed to move
    
    produces:
    a set `fix` such that makes M maximal, ensuring to be one-to-one
    '''
    if A is None: A = set(range(len(M))) # init to handle first invocation, all elems can move
    
    if len(A) == 1: return A # recursion base, unary perm can move, trivial
    
    B = set(M[i] for i in A) # b in B iff b is desired by someone
    C = A - B # c in C iff c isn't desired, so discard it
    return naive_max_perm(M, A - C) if C else A # recur with desired position only


I = range(8) # the identity permutation
letters = "abcdefgh"
perm_isomorphism(I, letters)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']


M = [2, 2, 0, 5, 3, 5, 7, 4]
perm_isomorphism(M, letters)

['c', 'c', 'a', 'f', 'd', 'f', 'h', 'e']


fix = naive_max_perm(M)
max_M = fix_perm(M, fix)
perm_isomorphism(max_M, letters)

['c', 'b', 'a', 'd', 'e', 'f', 'g', 'h']


naive_max_perm(M)

{0, 2, 5}


def max_perm(M):
    n = len(M) # How many elements?
    A = set(range(n)) # A = {0, 1, ... , n-1}
    count = Counter(M) # desired positions by frequencies
    Q = deque([i for i in A if not count[i]]) # useless elements
    while Q: # While useless elts. left...
        i = Q.pop() # get one of them
        A.remove(i) # remove it from the maximal permutation
        j = M[i] # get its desired position
        count[j] -= 1 # and release it for someone else
        if not count[j]: # if such position isn't desired anymore
            Q.appendleft(j) # enqueue such position in order to discard it            
    return A


fix = max_perm(M)
max_M = fix_perm(M, fix)
perm_isomorphism(max_M, letters)

['c', 'b', 'a', 'd', 'e', 'f', 'g', 'h']


def counting_sort(A, key=None, sort_boundary=None):
    '''
    Sorts the given collection A in linear time, assuming their elements are hashable.
    
    This implementation implements a vanilla counting sort, working in linear time respect
    iterable length and spacing between objects. It works best if elements are evenly, namely
    *uniformly* distributed in the domain; on contrast, if they are sparse and concentrated 
    near accumulation points, traversing distances between them is time consuming. 
    
    If `sort_boundary` is instantiated to a float within [0,1], then the domain is ordered
    using a classic loglinear algorithm before building the result.
    '''
    if key is None: key = lambda x: x
        
    B, C = [], defaultdict(list)
    for x in A:
        C[key(x)].append(x)
    
    domain = sorted(C)  if sort_boundary and len(C) <= len(A)*sort_boundary \
                        else range(min(C), max(C)+1)
    for k in domain:
        B.extend(C[k])
        
    return B


A = [randrange(50) for i in range(2*10**3)]

assert sorted(A) == counting_sort(A)


n, bins, patches = plt.hist(A, 10, facecolor='green', alpha=0.5)
plt.xlabel('elements'); plt.ylabel('frequencies'); plt.grid(True)
plt.show()


%timeit counting_sort(A)

219 µs ± 10.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


%timeit counting_sort(A, sort_boundary=1)

206 µs ± 8.96 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


B = ([randrange(50) for i in range(10**3)] + 
     [10**4 + randrange(50) for i in range(10**3)])


n, bins, patches = plt.hist(B, 100, facecolor='green', alpha=0.5)
plt.xlabel('elements'); plt.ylabel('frequencies'); plt.grid(True)
plt.show()


assert sorted(B) == counting_sort(B)


%timeit counting_sort(B)

2.01 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


%timeit counting_sort(B, sort_boundary=1/8)

247 µs ± 20.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

Tips, tricks and gotchas¶

A grouping pattern, avoiding quadratic time¶

The Bunch pattern¶

Python's `list.append` isn't Lisp's `cons`¶

enhance with `deque` objects¶

Hidden squares: concerning `list`s and `set`s¶

concerning `string`s¶

Counting¶

Max permutation¶

Counting Sort¶

Tips, tricks and gotchas¶

A grouping pattern, avoiding quadratic time¶

The Bunch pattern¶

Python's list.append isn't Lisp's cons¶

enhance with deque objects¶

Hidden squares: concerning lists and sets¶

concerning strings¶

Counting¶

Max permutation¶

Counting Sort¶

Python's `list.append` isn't Lisp's `cons`¶

enhance with `deque` objects¶

Hidden squares: concerning `list`s and `set`s¶

concerning `string`s¶