序列的修改、散列和切片

当前位置:

首页 > temp > python入门教程 >

序列的修改、散列和切片

from array import array
import reprlib, math, numbers
from functools import reduce
from operator import xor
from itertools import chain

# Vector_v1
class Vector:
    typecode = 'd'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __iter__(self):
        return iter(self._componeents)

    def __repr__(self):
        components = reprlib.repr(self._componeents)  # array('d', [1, 2, 3, 4, ...])
        components = components[components.find('['):-1]  # [1, 2, 3, 4, ...]
        return 'Vector({})'.format(components)  # Vector([1, 2, 3, 4])
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._componeents))

    def __eq__(self, other):
        return tuple(self) == tuple(other)

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))

    def __bool__(self):
        return bool(abs(self))
    
    def __len__(self):
        return len(self._componeents)
    
    def __getitem__(self, index):
        return self._componeents[index]

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

reprlib.repr() 获取对象的有限长度表现形式, 多的用 ... 表示, eg. array('d', [0, 1, 2, 3, 4, ...])

# Test
v1 = Vector([3, 4, 5])

print(len(v1))
print(v1[0], v1[-1])

v7 = Vector(range(7))
print(v7[1:4])

3
3.0 5.0
array('d', [1.0, 2.0, 3.0])

切片原理

class Seq:
    def __getitem__(self, index):
        return index

s = Seq()
print(1, s[1])
print(2, s[1:4])
print(3, s[1:4:2])
print(4, s[1:4:2, 9])
print(5, s[1:4:2, 7:9])

1 1
2 slice(1, 4, None)
3 slice(1, 4, 2)
4 (slice(1, 4, 2), 9)
5 (slice(1, 4, 2), slice(7, 9, None))

print(slice)
print(dir(slice))
print(help(slice.indices))

<class 'slice'>
['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'indices', 'start', 'step', 'stop']
Help on method_descriptor:

indices(...)
    S.indices(len) -> (start, stop, stride)
    
    Assuming a sequence of length len, calculate the start and stop
    indices, and the stride length of the extended slice described by
    S. Out of bounds indices are clipped in a manner consistent with the
    handling of normal slices.

None

S.indices(len) -> (start, stop, stride)

自动将 slice 适配到长度为 len 的对象上

print(slice(None, 10, 2).indices(5))
print(slice(-3, None, None).indices(5))

(0, 5, 2)
(2, 5, 1)

print('ABCDE'[:10:2])  # 等价于
print('ABCDE'[0:5:2])

print('ABCDE'[-3:])  # 等价于
print('ABCDE'[2:5:1])

ACE
ACE
CDE
CDE

# Vector_v2
class Vector:
    typecode = 'd'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __iter__(self):
        return iter(self._componeents)

    ################################# NEW ######################################
    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):  # slice 切片
            return cls(self._componeents[index])
        elif isinstance(index, numbers.Integral):  # int 索引
            return self._componeents[index]
        else:  # 抛出异常
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))

    def __repr__(self):
        components = reprlib.repr(self._componeents)  # array('d', [1, 2, 3, 4, ...])
        components = components[components.find('['):-1]  # [1, 2, 3, 4, ...]
        return 'Vector({})'.format(components)  # Vector([1, 2, 3, 4])
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._componeents))

    def __eq__(self, other):
        return tuple(self) == tuple(other)

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))

    def __bool__(self):
        return bool(abs(self))
    
    def __len__(self):
        return len(self._componeents)

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

v7 = Vector(range(7))
print(1, v7[-1])
print(2, v7[1:4])
print(3, v7[-1:])
print(4, v7[1, 2])  # 报误，不支持多维切片

1 6.0
2 (1.0, 2.0, 3.0)
3 (6.0,)



---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

Cell In [28], line 5
      3 print(2, v7[1:4])
      4 print(3, v7[-1:])
----> 5 print(4, v7[1, 2])


Cell In [22], line 19, in Vector.__getitem__(self, index)
     17 else:  # 抛出异常
     18     msg = '{cls.__name__} indices must be integers'
---> 19     raise TypeError(msg.format(cls=cls))


TypeError: Vector indices must be integers

# Vector_v3 动态存取属性

class Vector:
    typecode = 'd'
    shortcut_numes = 'xyzt'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __iter__(self):
        return iter(self._componeents)
    
    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):  # slice 切片
            return cls(self._componeents[index])
        elif isinstance(index, numbers.Integral):  # int 索引
            return self._componeents[index]
        else:  # 抛出异常
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))

    ################################# NEW ######################################
    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos = cls.shortcut_numes.find(name)
            if 0 <= pos < len(self._componeents):
                return self._componeents[pos]
        
        msg = '{.__name__!r} object has no attribute {!r}'
        raise AttributeError(msg.format(cls, name))

v5 = Vector(range(5))
print(1, v5)
print(2, v5.x)

v5.x = 10
print(3, v5.x)  # v5[0] 变了？

print(4, v5)  # v5 实际上并没有变

1 (0.0, 1.0, 2.0, 3.0, 4.0)
2 0.0
3 10
4 (0.0, 1.0, 2.0, 3.0, 4.0)

解释:

当且仅当对象中没有指定名称的属性时, 才会调用 __getattr__
当执行 v5.x = 10 会给 v5 创建 x 这个属性, 这个属性也称为 虚拟属性
之后访问 v5.x 便是该属性的值, 而不通过 __getattr__ 获取

# 改进

# Vector_v3 动态存取属性

class Vector:
    typecode = 'd'
    shortcut_numes = 'xyzt'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos = cls.shortcut_numes.find(name)
            if 0 <= pos < len(self._componeents):
                return self._componeents[pos]
        
        msg = '{.__name__!r} object has no attribute {!r}'
        raise AttributeError(msg.format(cls, name))

    ################################# NEW ######################################
    def __setattr__(self, name, value) -> None:
        cls = type(self)
        if len(name) == 1:
            if name in cls.shortcut_numes:
                error = 'readonly attribute {attr_name!r}'
            elif name.islower():
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''

            if error:
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)

通过 __setattr__ 方法防止其修改部分属性

v6 = Vector(range(6))
print(1, v6)
print(2, v6.x)

v6.x = 10

1 <__main__.Vector object at 0x000001BD0AD009A0>
2 0.0



---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

Cell In [35], line 5
      2 print(1, v6)
      3 print(2, v6.x)
----> 5 v6.x = 10


Cell In [34], line 35, in Vector.__setattr__(self, name, value)
     33     if error:
     34         msg = error.format(cls_name=cls.__name__, attr_name=name)
---> 35         raise AttributeError(msg)
     36 super().__setattr__(name, value)


AttributeError: readonly attribute 'x'

散列 & 快速等值测试

__hash__

需要依次计算 v[0] ^ v[1] ^ v[2] ...

reduce()

__eq__

# 复习一下 reduce
print(reduce(lambda a, b: a * b, range(1, 6)))

# 计算多个数异或与
print(reduce(lambda a, b: a ^ b, range(233)))
print(reduce(xor, range(233)))

n = 0
for i in range(1, 233):
    n ^= i
print(n)

232
232
232

# Vector_v3 加入 __hash__

class Vector:
    typecode = 'd'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __iter__(self):
        return iter(self._componeents)

    ################################# OLD ######################################
    def __eq__(self, other):
        return tuple(self) == tuple(other)

    ################################# NEW ######################################
    def __hash__(self):
        hashes = (hash(x) for x in self._componeents)
        return reduce(xor, hashes, 0)

注：

使用 reduce() 最好提供三个参数 reduce(function, iterable, initializer)
通常, 对于 +, |, ^ initializer = 0
通常, 对于 *, & initializer = 1

__eq__ 要和 __hash__ 在一起哦~

# 使用【映射归约】实现 __hash__ (map, reduce)
# Vector_v3 加入 __hash__

class Vector:
    typecode = 'd'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __iter__(self):
        return iter(self._componeents)

    ################################# NEW ######################################
    def __hash__(self):
        hashes = map(hash, self._componeents)  # 计算各个元素的hash
        return reduce(xor, hashes, 0)

    ################################# NEW ######################################
    def __eq__(self, other):
        if len(self) != len(other):  # 长度不相等, 直接 False
            return False

        for a, b in zip(self, other):  # 判断每个对应元素是否相等
            if a != b:
                return False

        return True

    ################################# NEW ######################################
    # 另一种方法
    def __eq__(self, other):
        return len(self) != len(other) and all(a == b for a, b in zip(self, other))

all() 只要有一个是 False , 整个都是 False

上面两种 __eq__ 效果相等

# 回忆一下 zip()

print(1, zip(range(3), 'ABC'))
print(2, list(zip(range(3), 'ABC')))

print(3, list(zip(range(3), 'ABC', [0, 1, 2, 3])))  # 什么？？？一个可迭代对象迭代完了, 就不迭代了

from itertools import zip_longest
print(4, list(zip_longest(range(3), 'ABC', [0, 1, 2, 3], fillvalue=1)))  # 按照最长的iter迭代, 空的用 fillvalue 补齐

1 <zip object at 0x000001BD0A82A9C0>
2 [(0, 'A'), (1, 'B'), (2, 'C')]
3 [(0, 'A', 0), (1, 'B', 1), (2, 'C', 2)]
4 [(0, 'A', 0), (1, 'B', 1), (2, 'C', 2), (1, 1, 3)]

zip_longest() 按照最长的 iter 迭代, 空的用 fillvalue 补齐

format 格式化

目标: 得到球面坐标 <r, ɸ1, ɸ2, ɸ3>

# 使用【映射归约】实现 __hash__ (map, reduce)
# Vector_v3 加入 __hash__

class Vector:
    typecode = 'd'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __iter__(self):
        return iter(self._componeents)

    ################################# NEW ######################################
    def angle(self, n):
        r = math.sqrt(sum(x * x for x in self[n:]))
        a = math.atan2(r, self[n-1])
        if (n == len(self) - 1) and (self[-1] < 0):
            return math.pi * 2 - a
        else:
            return a

    def angles(self):
        return (self.angle(n) for n in range(1, len(self)))

    def __format__(self, fmt_spec=''):
        if fmt_spec.endswith('h'):  # 获取超球体坐标
            fmt_spec = fmt_spec[:-1]
            coords = chain([abs(self)], self.angles())  # 生成生成器表达式, 无缝迭代向量的模和各个角坐标
            outer_fmt = '<{}>'
        else:
            coords = self
            outer_fmt = '({})'
        
        components = (format(c, fmt_spec) for c in coords)
        return outer_fmt.format(', '.join(components))

# Vector Final

class Vector:
    typecode = 'd'

    def __init__(self, components) -> None:
        self._componeents = array(self.typecode, components)

    def __iter__(self):
        return iter(self._componeents)

    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):  # slice 切片
            return cls(self._componeents[index])
        elif isinstance(index, numbers.Integral):  # int 索引
            return self._componeents[index]
        else:  # 抛出异常
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))

    def __repr__(self):
        components = reprlib.repr(self._componeents)  # array('d', [1, 2, 3, 4, ...])
        components = components[components.find('['):-1]  # [1, 2, 3, 4, ...]
        return 'Vector({})'.format(components)  # Vector([1, 2, 3, 4])
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._componeents))
    
    def __hash__(self):
        hashes = map(hash, self._componeents)  # 计算各个元素的hash
        return reduce(xor, hashes, 0)

    def __eq__(self, other):
        return len(self) != len(other) and all(a == b for a, b in zip(self, other))

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))

    def __bool__(self):
        return bool(abs(self))
    
    def __len__(self):
        return len(self._componeents)

    shortcut_numes = 'xyzt'

    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos = cls.shortcut_numes.find(name)
            if 0 <= pos < len(self._componeents):
                return self._componeents[pos]
        
        msg = '{.__name__!r} object has no attribute {!r}'
        raise AttributeError(msg.format(cls, name))

    def __setattr__(self, name, value) -> None:
        cls = type(self)
        if len(name) == 1:
            if name in cls.shortcut_numes:
                error = 'readonly attribute {attr_name!r}'
            elif name.islower():
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''

            if error:
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

__EOF__

本文作者： Zinc233

本文链接： https://www.cnblogs.com/Zinc233/p/FluentPython_S10.html

栏目列表