diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 5fc7c30..829c2a2 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v4 @@ -28,12 +28,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools + python -m pip install build python -m pip install twine python -m pip install sphinx if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi - name: Run Tests run: | python tests.py - python setup.py sdist + python -m build --sdist twine check dist/* sphinx-build -b html docs dist/docs diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index 7b2baef..13e34dd 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ The :py:mod:`nameparser.config` module manages the configuration of the nameparser. @@ -11,7 +10,7 @@ >>> from nameparser.config import CONSTANTS >>> CONSTANTS.titles.remove('hon').add('chemistry','dean') # doctest: +ELLIPSIS - SetManager(set([u'msgt', ..., u'adjutant'])) + SetManager({'msgt', ..., 'adjutant'}) You can also adjust the configuration of individual instances by passing ``None`` as the second argument upon instantiation. @@ -21,22 +20,16 @@ >>> from nameparser import HumanName >>> hn = HumanName("Dean Robert Johns", None) >>> hn.C.titles.add('dean') # doctest: +ELLIPSIS - SetManager(set([u'msgt', ..., u'adjutant'])) + SetManager({'msgt', ..., 'adjutant'}) >>> hn.parse_full_name() # need to run this again after config changes **Potential Gotcha**: If you do not pass ``None`` as the second argument, ``hn.C`` will be a reference to the module config, possibly yielding unexpected results. See `Customizing the Parser `_. """ -from __future__ import unicode_literals import sys -try: - # Python 3.3+ - from collections.abc import Set -except ImportError: - from collections import Set +from collections.abc import Set -from nameparser.util import binary_type from nameparser.util import lc from nameparser.config.prefixes import PREFIXES from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS @@ -80,9 +73,6 @@ def __contains__(self, value): def __len__(self): return len(self.elements) - def next(self): - return self.__next__() - def __next__(self): if self.count >= len(self.elements): self.count = 0 @@ -102,7 +92,7 @@ def add_with_encoding(self, s, encoding=None): if sys.stdin: stdin_encoding = sys.stdin.encoding encoding = encoding or stdin_encoding or DEFAULT_ENCODING - if type(s) == binary_type: + if isinstance(s, bytes): s = s.decode(encoding) self.elements.add(lc(s)) diff --git a/nameparser/config/capitalization.py b/nameparser/config/capitalization.py index 84dfbef..f0a94dc 100644 --- a/nameparser/config/capitalization.py +++ b/nameparser/config/capitalization.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - CAPITALIZATION_EXCEPTIONS = ( ('ii', 'II'), ('iii', 'III'), diff --git a/nameparser/config/conjunctions.py b/nameparser/config/conjunctions.py index fb89201..77be4fd 100644 --- a/nameparser/config/conjunctions.py +++ b/nameparser/config/conjunctions.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - CONJUNCTIONS = set([ '&', 'and', diff --git a/nameparser/config/prefixes.py b/nameparser/config/prefixes.py index 0334f83..9e0e772 100644 --- a/nameparser/config/prefixes.py +++ b/nameparser/config/prefixes.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - #: Name pieces that appear before a last name. Prefixes join to the piece #: that follows them to make one new piece. They can be chained together, e.g #: "von der" and "de la". Because they only appear in middle or last names, diff --git a/nameparser/config/regexes.py b/nameparser/config/regexes.py index bd4b320..24d16b3 100644 --- a/nameparser/config/regexes.py +++ b/nameparser/config/regexes.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals import re # emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python diff --git a/nameparser/config/suffixes.py b/nameparser/config/suffixes.py index 804f2b5..81400a4 100644 --- a/nameparser/config/suffixes.py +++ b/nameparser/config/suffixes.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - SUFFIX_NOT_ACRONYMS = set([ 'dr', 'esq', diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py index 04746bc..1467a53 100644 --- a/nameparser/config/titles.py +++ b/nameparser/config/titles.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - FIRST_NAME_TITLES = set([ 'aunt', 'auntie', diff --git a/nameparser/parser.py b/nameparser/parser.py index a5eb352..a08e744 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -1,13 +1,7 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - -import sys import re from operator import itemgetter from itertools import groupby -from nameparser.util import u -from nameparser.util import text_types, binary_type from nameparser.util import lc from nameparser.util import log from nameparser.config import CONSTANTS @@ -123,10 +117,10 @@ def __eq__(self, other): HumanName instances are equal to other objects whose lower case unicode representation is the same. """ - return (u(self)).lower() == (u(other)).lower() + return str(self).lower() == str(other).lower() def __ne__(self, other): - return not (u(self)).lower() == (u(other)).lower() + return not str(self).lower() == str(other).lower() def __getitem__(self, key): if isinstance(key, slice): @@ -140,9 +134,6 @@ def __setitem__(self, key, value): else: raise KeyError("Not a valid HumanName attribute", key) - def next(self): - return self.__next__() - def __next__(self): if self._count >= len(self._members): self._count = 0 @@ -152,7 +143,7 @@ def __next__(self): self._count = c + 1 return getattr(self, self._members[c]) or next(self) - def __unicode__(self): + def __str__(self): if self.string_format: # string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" _s = self.string_format.format(**self.as_dict()) @@ -164,11 +155,6 @@ def __unicode__(self): def __hash__(self): return hash(str(self)) - def __str__(self): - if sys.version_info[0] >= 3: - return self.__unicode__() - return self.__unicode__().encode(self.encoding) - def __repr__(self): if self.unparsable: _string = "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__, } @@ -182,9 +168,7 @@ def __repr__(self): 'suffix': self.suffix or '', 'nickname': self.nickname or '', } - if sys.version_info[0] >= 3: - return _string - return _string.encode(self.encoding) + return _string def as_dict(self, include_empty=True): """ @@ -361,7 +345,7 @@ def surnames(self): def _set_list(self, attr, value): if isinstance(value, list): val = value - elif isinstance(value, text_types): + elif isinstance(value, (str, bytes)): val = [value] elif value is None: val = [] @@ -481,7 +465,7 @@ def full_name(self): def full_name(self, value): self.original = value self._full_name = value - if isinstance(value, binary_type): + if isinstance(value, bytes): self._full_name = value.decode(self.encoding) self.parse_full_name() @@ -657,7 +641,7 @@ def parse_full_name(self): self.suffix_list += parts[1:] pieces = self.parse_pieces(parts[0].split(' ')) - log.debug("pieces: %s", u(pieces)) + log.debug("pieces: %s", str(pieces)) for i, piece in enumerate(pieces): try: nxt = pieces[i + 1] @@ -686,7 +670,7 @@ def parse_full_name(self): # last [suffix], title first middles[,] suffix [,suffix] # parts[0], parts[1], parts[2:...] - log.debug("post-comma pieces: %s", u(post_comma_pieces)) + log.debug("post-comma pieces: %s", str(post_comma_pieces)) # lastname part may have suffixes in it lastname_pieces = self.parse_pieces(parts[0].split(' '), 1) @@ -747,7 +731,7 @@ def parse_pieces(self, parts, additional_parts_count=0): output = [] for part in parts: - if not isinstance(part, text_types): + if not isinstance(part, (str, bytes)): raise TypeError("Name parts must be strings. " "Got {0}".format(type(part))) output += [x.strip(' ,') for x in part.split(' ')] @@ -981,7 +965,7 @@ def capitalize(self, force=None): 'Shirley MacLaine' """ - name = u(self) + name = str(self) force = self.C.force_mixed_case_capitalization \ if force is None else force diff --git a/nameparser/util.py b/nameparser/util.py index 4ef7458..de8e282 100644 --- a/nameparser/util.py +++ b/nameparser/util.py @@ -2,36 +2,10 @@ # http://code.google.com/p/python-nameparser/issues/detail?id=10 log = logging.getLogger('HumanName') -try: - log.addHandler(logging.NullHandler()) -except AttributeError: - class NullHandler(logging.Handler): - def emit(self, record): - pass - log.addHandler(NullHandler()) +log.addHandler(logging.NullHandler()) log.setLevel(logging.ERROR) -import sys -if sys.version_info[0] < 3: - - text_type = unicode - binary_type = str - - def u(x, encoding=None): - if encoding: - return unicode(x, encoding) - else: - return unicode(x) - -else: - text_type = str - binary_type = bytes - - def u(x, encoding=None): - return text_type(x) - -text_types = (text_type, binary_type) def lc(value): """Lower case and remove any periods to normalize for comparison.""" if not value: diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2a9acf1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[bdist_wheel] -universal = 1 diff --git a/setup.py b/setup.py index c2451f8..ab1bd10 100755 --- a/setup.py +++ b/setup.py @@ -1,8 +1,5 @@ #!/usr/bin/env python -try: - from setuptools import setup -except ImportError: - from distutils.core import setup +from setuptools import setup import nameparser import os @@ -33,6 +30,7 @@ def read(fname): 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', 'Development Status :: 5 - Production/Stable', 'Natural Language :: English', "Topic :: Software Development :: Libraries :: Python Modules", diff --git a/tests.py b/tests.py index 2cdd526..900ce40 100644 --- a/tests.py +++ b/tests.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals import unittest """ Run this file to run the tests. @@ -27,7 +25,6 @@ dill = False from nameparser import HumanName -from nameparser.util import u from nameparser.config import Constants, TupleManager log = logging.getLogger('HumanName') @@ -2126,7 +2123,7 @@ def test_force_capitalization(self): def test_capitalize_diacritics(self): hn = HumanName('matthëus schmidt') hn.capitalize() - self.m(u(hn), 'Matthëus Schmidt', hn) + self.m(str(hn), 'Matthëus Schmidt', hn) # http://code.google.com/p/python-nameparser/issues/detail?id=15 def test_downcasing_mac(self): @@ -2161,14 +2158,14 @@ class HumanNameOutputFormatTests(HumanNameTestBase): def test_formatting_init_argument(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)", string_format="TEST1") - self.assertEqual(u(hn), "TEST1") + self.assertEqual(str(hn), "TEST1") def test_formatting_constants_attribute(self): from nameparser.config import CONSTANTS _orig = CONSTANTS.string_format CONSTANTS.string_format = "TEST2" hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") - self.assertEqual(u(hn), "TEST2") + self.assertEqual(str(hn), "TEST2") CONSTANTS.string_format = _orig def test_capitalize_name_constants_attribute(self): @@ -2196,77 +2193,77 @@ def test_capitalize_name_and_force_mixed_case_capitalization_constants_attribute def test_quote_nickname_formating(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III 'Kenny'") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III 'Kenny'") hn.string_format = "{last}, {title} {first} {middle}, {suffix} '{nickname}'" - self.assertEqual(u(hn), "Doe, Rev John A. Kenneth, III 'Kenny'") + self.assertEqual(str(hn), "Doe, Rev John A. Kenneth, III 'Kenny'") def test_formating_removing_keys_from_format_string(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III 'Kenny'") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III 'Kenny'") hn.string_format = "{last}, {title} {first} {middle}, {suffix}" - self.assertEqual(u(hn), "Doe, Rev John A. Kenneth, III") + self.assertEqual(str(hn), "Doe, Rev John A. Kenneth, III") hn.string_format = "{last}, {title} {first} {middle}" - self.assertEqual(u(hn), "Doe, Rev John A. Kenneth") + self.assertEqual(str(hn), "Doe, Rev John A. Kenneth") hn.string_format = "{last}, {first} {middle}" - self.assertEqual(u(hn), "Doe, John A. Kenneth") + self.assertEqual(str(hn), "Doe, John A. Kenneth") hn.string_format = "{last}, {first}" - self.assertEqual(u(hn), "Doe, John") + self.assertEqual(str(hn), "Doe, John") hn.string_format = "{first} {last}" - self.assertEqual(u(hn), "John Doe") + self.assertEqual(str(hn), "John Doe") def test_formating_removing_pieces_from_name_buckets(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III 'Kenny'") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III 'Kenny'") hn.string_format = "{title} {first} {middle} {last} {suffix}" - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III") hn.middle = '' - self.assertEqual(u(hn), "Rev John Doe III") + self.assertEqual(str(hn), "Rev John Doe III") hn.suffix = '' - self.assertEqual(u(hn), "Rev John Doe") + self.assertEqual(str(hn), "Rev John Doe") hn.title = '' - self.assertEqual(u(hn), "John Doe") + self.assertEqual(str(hn), "John Doe") def test_formating_of_nicknames_with_parenthesis(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III (Kenny)") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III (Kenny)") hn.nickname = '' - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III") def test_formating_of_nicknames_with_single_quotes(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III 'Kenny'") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III 'Kenny'") hn.nickname = '' - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III") def test_formating_of_nicknames_with_double_quotes(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} \"{nickname}\"" - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III \"Kenny\"") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III \"Kenny\"") hn.nickname = '' - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III") def test_formating_of_nicknames_in_middle(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} ({nickname}) {middle} {last} {suffix}" - self.assertEqual(u(hn), "Rev John (Kenny) A. Kenneth Doe III") + self.assertEqual(str(hn), "Rev John (Kenny) A. Kenneth Doe III") hn.nickname = '' - self.assertEqual(u(hn), "Rev John A. Kenneth Doe III") + self.assertEqual(str(hn), "Rev John A. Kenneth Doe III") def test_remove_emojis(self): hn = HumanName("Sam Smith 😊") self.m(hn.first, "Sam", hn) self.m(hn.last, "Smith", hn) - self.assertEqual(u(hn), "Sam Smith") + self.assertEqual(str(hn), "Sam Smith") def test_keep_non_emojis(self): hn = HumanName("∫≜⩕ Smith 😊") self.m(hn.first, "∫≜⩕", hn) self.m(hn.last, "Smith", hn) - self.assertEqual(u(hn), "∫≜⩕ Smith") + self.assertEqual(str(hn), "∫≜⩕ Smith") def test_keep_emojis(self): from nameparser.config import Constants @@ -2275,7 +2272,7 @@ def test_keep_emojis(self): hn = HumanName("∫≜⩕ Smith😊", constants) self.m(hn.first, "∫≜⩕", hn) self.m(hn.last, "Smith😊", hn) - self.assertEqual(u(hn), "∫≜⩕ Smith😊") + self.assertEqual(str(hn), "∫≜⩕ Smith😊") # test cleanup