Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
python -m pip install --upgrade "pip>=24.2" setuptools
python -m pip install build
python -m pip install twine
python -m pip install sphinx
if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi
python -m pip install --group dev
- name: Run linter
run: ruff check
- name: Check types
run: mypy
- name: Run Tests
run: |
python tests.py
Expand Down
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10
4 changes: 2 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Commands

```bash
# Install dev dependencies
pip install -r dev-requirements.txt
# Install dev dependencies (requires pip >= 24.1)
pip install --group dev

# Run all tests
python tests.py
Expand Down
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ Contributing
Development Environment Setup
--------------------------------

Install dev dependencies:
Install dev dependencies (requires pip >= 24.1 for dependency group support):

pip install -r dev-requirements.txt
pip install --group dev

Running Tests
---------------
Expand Down Expand Up @@ -38,5 +38,5 @@ Unless you add better coverage someplace else, add a few examples of your names
New Releases
------------

$ python setup.py sdist bdist_wheel
$ python -m build
$ twine upload dist/*
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Installation
If you want to try out the latest code from GitHub you can
install with pip using the command below.

``pip install -e git+git://github.com/derek73/python-nameparser.git#egg=nameparser``
``pip install -e git+https://github.com/derek73/python-nameparser.git``

If you need to handle lists of names, check out
`namesparser <https://github.com/gwu-libraries/namesparser>`_, a
Expand Down
2 changes: 0 additions & 2 deletions dev-requirements.txt

This file was deleted.

18 changes: 9 additions & 9 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# ruff: noqa: E402
#
# Nameparser documentation build configuration file, created by
# sphinx-quickstart on Fri May 16 01:29:58 2014.
Expand Down Expand Up @@ -49,8 +49,8 @@
master_doc = 'index'

# General information about the project.
project = u'Nameparser'
copyright = u'{:%Y}, Derek Gulbranson'.format(date.today())
project = 'Nameparser'
copyright = '{:%Y}, Derek Gulbranson'.format(date.today())

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down Expand Up @@ -223,8 +223,8 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'Nameparser.tex', u'Nameparser Documentation',
u'Derek Gulbranson', 'manual'),
('index', 'Nameparser.tex', 'Nameparser Documentation',
'Derek Gulbranson', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
Expand Down Expand Up @@ -253,8 +253,8 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'nameparser', u'Nameparser Documentation',
[u'Derek Gulbranson'], 1)
('index', 'nameparser', 'Nameparser Documentation',
['Derek Gulbranson'], 1)
]

# If true, show URL addresses after external links.
Expand All @@ -267,8 +267,8 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'Nameparser', u'Nameparser Documentation',
u'Derek Gulbranson', 'Nameparser', 'A simple python modules for parsing human names into components.',
('index', 'Nameparser', 'Nameparser Documentation',
'Derek Gulbranson', 'Nameparser', 'A simple python modules for parsing human names into components.',
'Miscellaneous'),
]

Expand Down
8 changes: 3 additions & 5 deletions nameparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
VERSION = (1, 1, 3)
__version__ = '.'.join(map(str, VERSION))
from nameparser._version import VERSION as VERSION
from nameparser._version import __version__ as __version__
from nameparser.parser import HumanName as HumanName
Comment thread
derek73 marked this conversation as resolved.
__author__ = "Derek Gulbranson"
__author_email__ = 'derek73@gmail.com'
__license__ = "LGPL"
__url__ = "https://github.com/derek73/python-nameparser"


from nameparser.parser import HumanName
2 changes: 2 additions & 0 deletions nameparser/_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
VERSION = (1, 1, 3)
__version__ = '.'.join(map(str, VERSION))
105 changes: 67 additions & 38 deletions nameparser/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@
``hn.C`` will be a reference to the module config, possibly yielding
unexpected results. See `Customizing the Parser <customize.html>`_.
"""
import re
import sys
from collections.abc import Set
from collections.abc import Iterable, Iterator, Mapping, Set
from typing import Any, TypeVar

from typing_extensions import Self

from nameparser.util import lc
from nameparser.config.prefixes import PREFIXES
Expand All @@ -38,7 +42,7 @@
from nameparser.config.suffixes import SUFFIX_NOT_ACRONYMS
from nameparser.config.titles import TITLES
from nameparser.config.titles import FIRST_NAME_TITLES
from nameparser.config.regexes import REGEXES
from nameparser.config.regexes import EMPTY_REGEX, REGEXES

DEFAULT_ENCODING = 'UTF-8'

Expand All @@ -55,25 +59,25 @@ class SetManager(Set):

'''

def __init__(self, elements):
def __init__(self, elements: Iterable[str]) -> None:
self.elements = set(elements)

def __call__(self):
def __call__(self) -> Set[str]:
return self.elements

def __repr__(self):
def __repr__(self) -> str:
return "SetManager({})".format(self.elements) # used for docs

def __iter__(self):
def __iter__(self) -> Iterator[str]:
return iter(self.elements)

def __contains__(self, value):
def __contains__(self, value: object) -> bool:
return value in self.elements

def __len__(self):
def __len__(self) -> int:
return len(self.elements)

def add_with_encoding(self, s, encoding=None):
def add_with_encoding(self, s: str, encoding: str | None = None) -> None:
"""
Add the lower case and no-period version of the string to the set. Pass an
explicit `encoding` parameter to specify the encoding of binary strings that
Expand All @@ -87,45 +91,59 @@ def add_with_encoding(self, s, encoding=None):
s = s.decode(encoding)
self.elements.add(lc(s))

def add(self, *strings):
def add(self, *strings: str) -> Self:
"""
Add the lower case and no-period version of the string arguments to the set.
Can pass a list of strings. Returns ``self`` for chaining.
"""
[self.add_with_encoding(s) for s in strings]
for s in strings:
self.add_with_encoding(s)

return self

def remove(self, *strings):
def remove(self, *strings: str) -> Self:
"""
Remove the lower case and no-period version of the string arguments from the set.
Returns ``self`` for chaining.
"""
[self.elements.remove(lc(s)) for s in strings if lc(s) in self.elements]
for s in strings:
if (lower := lc(s)) in self.elements:
self.elements.remove(lower)

return self


class TupleManager(dict):
T = TypeVar('T')


class TupleManager(dict[str, T]):
'''
A dictionary with dot.notation access. Subclass of ``dict``. Makes the tuple constants
more friendly.
'''

def __getattr__(self, attr):
def __getattr__(self, attr: str) -> T | None:
return self.get(attr)

__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__

def __getstate__(self):
def __getstate__(self) -> Mapping[str, T]:
return dict(self)

def __setstate__(self, state):
self.__init__(state)
def __setstate__(self, state: Mapping[str, T]) -> None:
self.update(state)

def __reduce__(self):
def __reduce__(self) -> tuple[type, tuple[()], Mapping[str, T]]:
return (TupleManager, (), self.__getstate__())


class Constants(object):
class RegexTupleManager(TupleManager[re.Pattern[str]]):
def __getattr__(self, attr: str) -> re.Pattern[str]:
return self.get(attr, EMPTY_REGEX)
Comment thread
derek73 marked this conversation as resolved.


class Constants:
"""
An instance of this class hold all of the configuration constants for the parser.

Expand All @@ -149,6 +167,17 @@ class Constants(object):
:py:attr:`regexes` wrapped with :py:class:`TupleManager`.
"""

prefixes: SetManager
suffix_acronyms: SetManager
suffix_not_acronyms: SetManager
titles: SetManager
first_name_titles: SetManager
conjunctions: SetManager
capitalization_exceptions: TupleManager[str]
regexes: RegexTupleManager

_pst: Set[str] | None

string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
"""
The default string format use for all new `HumanName` instances.
Expand All @@ -168,17 +197,17 @@ class Constants(object):
empty_attribute_default = ''
"""
Default return value for empty attributes.

.. doctest::

>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.empty_attribute_default = None
>>> name = HumanName("John Doe")
>>> name.title
None
>>>name.first
'John'

"""

capitalize_name = False
Expand Down Expand Up @@ -213,38 +242,38 @@ class Constants(object):
"""

def __init__(self,
prefixes=PREFIXES,
suffix_acronyms=SUFFIX_ACRONYMS,
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
titles=TITLES,
first_name_titles=FIRST_NAME_TITLES,
conjunctions=CONJUNCTIONS,
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
regexes=REGEXES
):
prefixes: Iterable[str] = PREFIXES,
suffix_acronyms: Iterable[str] = SUFFIX_ACRONYMS,
suffix_not_acronyms: Iterable[str] = SUFFIX_NOT_ACRONYMS,
titles: Iterable[str] = TITLES,
first_name_titles: Iterable[str] = FIRST_NAME_TITLES,
conjunctions: Iterable[str] = CONJUNCTIONS,
capitalization_exceptions: TupleManager[str] | Iterable[tuple[str, str]] = CAPITALIZATION_EXCEPTIONS,
regexes: RegexTupleManager | TupleManager[re.Pattern[str]] | Iterable[tuple[str, re.Pattern[str]]] = REGEXES
) -> None:
self.prefixes = SetManager(prefixes)
self.suffix_acronyms = SetManager(suffix_acronyms)
self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
self.titles = SetManager(titles)
self.first_name_titles = SetManager(first_name_titles)
self.conjunctions = SetManager(conjunctions)
self.capitalization_exceptions = TupleManager(capitalization_exceptions)
self.regexes = TupleManager(regexes)
self.regexes = RegexTupleManager(regexes)
self._pst = None

@property
def suffixes_prefixes_titles(self):
def suffixes_prefixes_titles(self) -> Set[str]:
if not self._pst:
self._pst = self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles
return self._pst

def __repr__(self):
def __repr__(self) -> str:
return "<Constants() instance>"

def __setstate__(self, state):
self.__init__(state)
def __setstate__(self, state: Mapping[str, Any]) -> None:
Constants.__init__(self, state)

def __getstate__(self):
def __getstate__(self) -> Mapping[str, Any]:
attrs = [x for x in dir(self) if not x.startswith('_')]
return dict([(a, getattr(self, a)) for a in attrs])

Expand Down
2 changes: 1 addition & 1 deletion nameparser/config/capitalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
)
"""
Any pieces that are not capitalized by capitalizing the first letter.
"""
"""
2 changes: 1 addition & 1 deletion nameparser/config/conjunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
Pieces that should join to their neighboring pieces, e.g. "and", "y" and "&".
"of" and "the" are also include to facilitate joining multiple titles,
e.g. "President of the United States".
"""
"""
2 changes: 2 additions & 0 deletions nameparser/config/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
'[\u2600-\u26FF\u2700-\u27BF])+',
re.UNICODE)

EMPTY_REGEX = re.compile('')

REGEXES = set([
("spaces", re.compile(r"\s+", re.U)),
("word", re.compile(r"(\w|\.)+", re.U)),
Expand Down
Loading