Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]

steps:
- uses: actions/checkout@v4
Expand All @@ -28,12 +28,13 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
python -m pip install build
python -m pip install twine
python -m pip install sphinx
if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi
- name: Run Tests
run: |
python tests.py
python setup.py sdist
python -m build --sdist
twine check dist/*
sphinx-build -b html docs dist/docs
18 changes: 4 additions & 14 deletions nameparser/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
"""
The :py:mod:`nameparser.config` module manages the configuration of the
nameparser.
Expand All @@ -11,7 +10,7 @@

>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.titles.remove('hon').add('chemistry','dean') # doctest: +ELLIPSIS
SetManager(set([u'msgt', ..., u'adjutant']))
SetManager({'msgt', ..., 'adjutant'})

You can also adjust the configuration of individual instances by passing
``None`` as the second argument upon instantiation.
Expand All @@ -21,22 +20,16 @@
>>> from nameparser import HumanName
>>> hn = HumanName("Dean Robert Johns", None)
>>> hn.C.titles.add('dean') # doctest: +ELLIPSIS
SetManager(set([u'msgt', ..., u'adjutant']))
SetManager({'msgt', ..., 'adjutant'})
>>> hn.parse_full_name() # need to run this again after config changes

**Potential Gotcha**: If you do not pass ``None`` as the second argument,
``hn.C`` will be a reference to the module config, possibly yielding
unexpected results. See `Customizing the Parser <customize.html>`_.
"""
from __future__ import unicode_literals
import sys
try:
# Python 3.3+
from collections.abc import Set
except ImportError:
from collections import Set
from collections.abc import Set

from nameparser.util import binary_type
from nameparser.util import lc
from nameparser.config.prefixes import PREFIXES
from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS
Expand Down Expand Up @@ -80,9 +73,6 @@ def __contains__(self, value):
def __len__(self):
return len(self.elements)

def next(self):
return self.__next__()

def __next__(self):
if self.count >= len(self.elements):
self.count = 0
Expand All @@ -102,7 +92,7 @@ def add_with_encoding(self, s, encoding=None):
if sys.stdin:
stdin_encoding = sys.stdin.encoding
encoding = encoding or stdin_encoding or DEFAULT_ENCODING
if type(s) == binary_type:
if isinstance(s, bytes):
s = s.decode(encoding)
self.elements.add(lc(s))

Expand Down
3 changes: 0 additions & 3 deletions nameparser/config/capitalization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

CAPITALIZATION_EXCEPTIONS = (
('ii', 'II'),
('iii', 'III'),
Expand Down
3 changes: 0 additions & 3 deletions nameparser/config/conjunctions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

CONJUNCTIONS = set([
'&',
'and',
Expand Down
3 changes: 0 additions & 3 deletions nameparser/config/prefixes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

#: Name pieces that appear before a last name. Prefixes join to the piece
#: that follows them to make one new piece. They can be chained together, e.g
#: "von der" and "de la". Because they only appear in middle or last names,
Expand Down
2 changes: 0 additions & 2 deletions nameparser/config/regexes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re

# emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python
Expand Down
3 changes: 0 additions & 3 deletions nameparser/config/suffixes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

SUFFIX_NOT_ACRONYMS = set([
'dr',
'esq',
Expand Down
3 changes: 0 additions & 3 deletions nameparser/config/titles.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

FIRST_NAME_TITLES = set([
'aunt',
'auntie',
Expand Down
36 changes: 10 additions & 26 deletions nameparser/parser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import sys
import re
from operator import itemgetter
from itertools import groupby

from nameparser.util import u
from nameparser.util import text_types, binary_type
from nameparser.util import lc
from nameparser.util import log
from nameparser.config import CONSTANTS
Expand Down Expand Up @@ -123,10 +117,10 @@ def __eq__(self, other):
HumanName instances are equal to other objects whose
lower case unicode representation is the same.
"""
return (u(self)).lower() == (u(other)).lower()
return str(self).lower() == str(other).lower()

def __ne__(self, other):
return not (u(self)).lower() == (u(other)).lower()
return not str(self).lower() == str(other).lower()

def __getitem__(self, key):
if isinstance(key, slice):
Expand All @@ -140,9 +134,6 @@ def __setitem__(self, key, value):
else:
raise KeyError("Not a valid HumanName attribute", key)

def next(self):
return self.__next__()

def __next__(self):
if self._count >= len(self._members):
self._count = 0
Expand All @@ -152,7 +143,7 @@ def __next__(self):
self._count = c + 1
return getattr(self, self._members[c]) or next(self)

def __unicode__(self):
def __str__(self):
if self.string_format:
# string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
_s = self.string_format.format(**self.as_dict())
Expand All @@ -164,11 +155,6 @@ def __unicode__(self):
def __hash__(self):
return hash(str(self))

def __str__(self):
if sys.version_info[0] >= 3:
return self.__unicode__()
return self.__unicode__().encode(self.encoding)

def __repr__(self):
if self.unparsable:
_string = "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__, }
Expand All @@ -182,9 +168,7 @@ def __repr__(self):
'suffix': self.suffix or '',
'nickname': self.nickname or '',
}
if sys.version_info[0] >= 3:
return _string
return _string.encode(self.encoding)
return _string

def as_dict(self, include_empty=True):
"""
Expand Down Expand Up @@ -361,7 +345,7 @@ def surnames(self):
def _set_list(self, attr, value):
if isinstance(value, list):
val = value
elif isinstance(value, text_types):
elif isinstance(value, (str, bytes)):
val = [value]
elif value is None:
val = []
Expand Down Expand Up @@ -481,7 +465,7 @@ def full_name(self):
def full_name(self, value):
self.original = value
self._full_name = value
if isinstance(value, binary_type):
if isinstance(value, bytes):
self._full_name = value.decode(self.encoding)
self.parse_full_name()

Expand Down Expand Up @@ -657,7 +641,7 @@ def parse_full_name(self):

self.suffix_list += parts[1:]
pieces = self.parse_pieces(parts[0].split(' '))
log.debug("pieces: %s", u(pieces))
log.debug("pieces: %s", str(pieces))
for i, piece in enumerate(pieces):
try:
nxt = pieces[i + 1]
Expand Down Expand Up @@ -686,7 +670,7 @@ def parse_full_name(self):
# last [suffix], title first middles[,] suffix [,suffix]
# parts[0], parts[1], parts[2:...]

log.debug("post-comma pieces: %s", u(post_comma_pieces))
log.debug("post-comma pieces: %s", str(post_comma_pieces))

# lastname part may have suffixes in it
lastname_pieces = self.parse_pieces(parts[0].split(' '), 1)
Expand Down Expand Up @@ -747,7 +731,7 @@ def parse_pieces(self, parts, additional_parts_count=0):

output = []
for part in parts:
if not isinstance(part, text_types):
if not isinstance(part, (str, bytes)):
raise TypeError("Name parts must be strings. "
"Got {0}".format(type(part)))
output += [x.strip(' ,') for x in part.split(' ')]
Expand Down Expand Up @@ -981,7 +965,7 @@ def capitalize(self, force=None):
'Shirley MacLaine'

"""
name = u(self)
name = str(self)
force = self.C.force_mixed_case_capitalization \
if force is None else force

Expand Down
28 changes: 1 addition & 27 deletions nameparser/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,10 @@

# http://code.google.com/p/python-nameparser/issues/detail?id=10
log = logging.getLogger('HumanName')
try:
log.addHandler(logging.NullHandler())
except AttributeError:
class NullHandler(logging.Handler):
def emit(self, record):
pass
log.addHandler(NullHandler())
log.addHandler(logging.NullHandler())
log.setLevel(logging.ERROR)


import sys
if sys.version_info[0] < 3:

text_type = unicode
binary_type = str

def u(x, encoding=None):
if encoding:
return unicode(x, encoding)
else:
return unicode(x)

else:
text_type = str
binary_type = bytes

def u(x, encoding=None):
return text_type(x)

text_types = (text_type, binary_type)
def lc(value):
"""Lower case and remove any periods to normalize for comparison."""
if not value:
Expand Down
2 changes: 0 additions & 2 deletions setup.cfg

This file was deleted.

6 changes: 2 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#!/usr/bin/env python
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
from setuptools import setup
import nameparser
import os

Expand Down Expand Up @@ -33,6 +30,7 @@ def read(fname):
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
'Programming Language :: Python :: 3.14',
'Development Status :: 5 - Production/Stable',
'Natural Language :: English',
"Topic :: Software Development :: Libraries :: Python Modules",
Expand Down
Loading
Loading