515 lines
22 KiB
Python
515 lines
22 KiB
Python
from enum import Enum, auto
|
|
from abc import ABC, abstractmethod
|
|
from datetime import date
|
|
from xml.etree.ElementTree import Element
|
|
from defusedxml.ElementTree import fromstring
|
|
|
|
|
|
class DocType(Enum):
|
|
RFC = 1
|
|
STD = 2
|
|
BCP = 3
|
|
FYI = 4
|
|
NIC = 5
|
|
IEN = 6
|
|
RTR = 7
|
|
|
|
def docID(self,
|
|
num: int) -> str:
|
|
if self.value < 5: # RFC, STD, BCP, FYI
|
|
return f"{self.name}{str(num).rjust(4, '0')}"
|
|
else: # NIC, IEN, RTR
|
|
return f"{self.name}{num}"
|
|
|
|
|
|
class Status(Enum):
|
|
INTERNET_STANDARD = auto()
|
|
DRAFT_STANDARD = auto()
|
|
PROPOSED_STANDARD = auto()
|
|
UNKNOWN = auto()
|
|
BEST_CURRENT_PRACTICE = auto()
|
|
FOR_YOUR_INFORMATION = auto()
|
|
EXPERIMENTAL = auto()
|
|
HISTORIC = auto()
|
|
INFORMATIONAL = auto()
|
|
|
|
|
|
class FileFormat(Enum):
|
|
ASCII = auto()
|
|
PS = auto()
|
|
PDF = auto()
|
|
TGZ = auto()
|
|
HTML = auto()
|
|
XML = auto()
|
|
TEXT = auto()
|
|
|
|
|
|
class Stream(Enum):
|
|
IETF = auto()
|
|
IAB = auto()
|
|
IRTF = auto()
|
|
INDEPENDENT = auto()
|
|
Editorial = auto()
|
|
Legacy = auto()
|
|
|
|
|
|
class Month(Enum):
|
|
January = 1
|
|
February = 2
|
|
March = 3
|
|
April = 4
|
|
May = 5
|
|
June = 6
|
|
July = 7
|
|
August = 8
|
|
September = 9
|
|
October = 10
|
|
November = 11
|
|
December = 12
|
|
|
|
|
|
class Author:
|
|
def __init__(self,
|
|
name: str,
|
|
title: str = "",
|
|
organization: str = "",
|
|
org_abbrev: str = ""):
|
|
self.name: str = name
|
|
self.title: str = title
|
|
self.organization: str = organization
|
|
self.org_abbrev: str = org_abbrev
|
|
return
|
|
|
|
|
|
class Document(ABC):
|
|
def __init__(self,
|
|
type: DocType,
|
|
number: int,
|
|
title: str = "",
|
|
is_also: list['Document'] = []):
|
|
self.type: DocType = type
|
|
self.number: int = number
|
|
self.title: str = title
|
|
self.is_also: list['Document'] = is_also
|
|
return
|
|
|
|
def docID(self) -> str:
|
|
return self.type.docID(self.number)
|
|
|
|
@abstractmethod
|
|
def update(self, **kwargs) -> 'Document':
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_references(self) -> list[tuple[str, 'Document']]:
|
|
pass
|
|
|
|
|
|
class RFC(Document):
|
|
def __init__(self,
|
|
number: int,
|
|
title: str = "",
|
|
authors: list[Author] = [],
|
|
pub_date: date = None,
|
|
current_status: Status = Status.UNKNOWN,
|
|
pub_status: Status = Status.UNKNOWN,
|
|
format: list[FileFormat] = [],
|
|
page_count: int = None,
|
|
keywords: list[str] = [],
|
|
abstract: list[str] = [],
|
|
draft: str = "",
|
|
notes: str = "",
|
|
obsoletes: list[Document] = [],
|
|
obsoleted_by: list[Document] = [],
|
|
updates: list[Document] = [],
|
|
updated_by: list[Document] = [],
|
|
is_also: list[Document] = [],
|
|
see_also: list[Document] = [],
|
|
stream: Stream = None,
|
|
area: str = "",
|
|
wg_acronym: str = "",
|
|
errata_url: str = "",
|
|
doi: str = ""):
|
|
super().__init__(DocType.RFC, number, title, is_also)
|
|
self.authors: list[Author] = authors
|
|
self.pub_date: date = pub_date
|
|
self.format: list[FileFormat] = format
|
|
self.page_count: int = page_count
|
|
self.keywords: list[str] = keywords
|
|
self.abstract: list[str] = abstract
|
|
self.draft: str = draft
|
|
self.notes: str = notes
|
|
self.obsoletes: list[Document] = obsoletes
|
|
self.obsoleted_by: list[Document] = obsoleted_by
|
|
self.updates: list[Document] = updates
|
|
self.updated_by: list[Document] = updated_by
|
|
self.see_also: list[Document] = see_also
|
|
self.current_status: Status = current_status
|
|
self.pub_status: Status = pub_status
|
|
self.stream: Stream = stream
|
|
self.area: str = area
|
|
self.wg_acronym: str = wg_acronym
|
|
self.errata_url: str = errata_url
|
|
self.doi: str = doi
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
if 'title' in kwargs:
|
|
self.title = kwargs['title']
|
|
if 'authors' in kwargs:
|
|
self.authors = kwargs['authors']
|
|
if 'pub_date' in kwargs:
|
|
self.pub_date = kwargs["pub_date"]
|
|
if 'current_status' in kwargs:
|
|
self.current_status = kwargs["current_status"]
|
|
if 'pub_status' in kwargs:
|
|
self.pub_status = kwargs["pub_status"]
|
|
if 'format' in kwargs:
|
|
self.format = kwargs["format"]
|
|
if 'page_count' in kwargs:
|
|
self.page_count = kwargs["page_count"]
|
|
if 'keywords' in kwargs:
|
|
self.keywords = kwargs["keywords"]
|
|
if 'abstract' in kwargs:
|
|
self.abstract = kwargs["abstract"]
|
|
if 'draft' in kwargs:
|
|
self.draft = kwargs["draft"]
|
|
if 'notes' in kwargs:
|
|
self.notes = kwargs["notes"]
|
|
if 'obsoletes' in kwargs:
|
|
self.obsoletes = kwargs["obsoletes"]
|
|
if 'obsoleted_by' in kwargs:
|
|
self.obsoleted_by = kwargs["obsoleted_by"]
|
|
if 'updates' in kwargs:
|
|
self.updates = kwargs["updates"]
|
|
if 'updated_by' in kwargs:
|
|
self.updated_by = kwargs["updated_by"]
|
|
if 'is_also' in kwargs:
|
|
self.is_also = kwargs["is_also"]
|
|
if 'see_also' in kwargs:
|
|
self.see_also = kwargs["see_also"]
|
|
if 'stream' in kwargs:
|
|
self.stream = kwargs["stream"]
|
|
if 'area' in kwargs:
|
|
self.area = kwargs["area"]
|
|
if 'wg_acronym' in kwargs:
|
|
self.wg_acronym = kwargs["wg_acronym"]
|
|
if 'errata_url' in kwargs:
|
|
self.errata_url = kwargs["errata_url"]
|
|
if 'doi' in kwargs:
|
|
self.doi = kwargs["doi"]
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
reftypes: list[str] = ["obsoletes"]*len(self.obsoletes)\
|
|
+ ["obsoleted by"]*len(self.obsoleted_by)\
|
|
+ ["updates"]*len(self.updates)\
|
|
+ ["updated by"]*len(self.updated_by)\
|
|
+ ["is also"]*len(self.is_also)\
|
|
+ ["see also"]*len(self.see_also)
|
|
refs: list[Document] = self.obsoletes \
|
|
+ self.obsoleted_by \
|
|
+ self.updates \
|
|
+ self.updated_by \
|
|
+ self.is_also \
|
|
+ self.see_also
|
|
return list(zip(reftypes, refs))
|
|
|
|
|
|
class NotIssued(Document):
|
|
def __init__(self,
|
|
number: int):
|
|
super().__init__(DocType.RFC, number)
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
return []
|
|
|
|
|
|
class STD(Document):
|
|
def __init__(self,
|
|
number: int,
|
|
title: str = "",
|
|
is_also: list[Document] = []):
|
|
super().__init__(DocType.STD, number, title, is_also)
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
if 'title' in kwargs:
|
|
self.title = kwargs['title']
|
|
if 'is_also' in kwargs:
|
|
self.is_also = kwargs['is_also']
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
return list(zip(["is also"]*len(self.is_also), self.is_also))
|
|
|
|
|
|
class BCP(Document):
|
|
def __init__(self,
|
|
number: int,
|
|
title: str = "",
|
|
is_also: list[Document] = []):
|
|
super().__init__(DocType.BCP, number, title, is_also)
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
if 'title' in kwargs:
|
|
self.title = kwargs['title']
|
|
if 'is_also' in kwargs:
|
|
self.is_also = kwargs['is_also']
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
return list(zip(["is also"]*len(self.is_also), self.is_also))
|
|
|
|
|
|
class FYI(Document):
|
|
def __init__(self,
|
|
number: int,
|
|
title: str = "",
|
|
is_also: list[Document] = []):
|
|
super().__init__(DocType.FYI, number, title, is_also)
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
if 'title' in kwargs:
|
|
self.title = kwargs['title']
|
|
if 'is_also' in kwargs:
|
|
self.is_also = kwargs['is_also']
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
return list(zip(["is also"]*len(self.is_also), self.is_also))
|
|
|
|
|
|
class NIC(Document):
|
|
def __init__(self,
|
|
number: int):
|
|
super().__init__(DocType.NIC, number)
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
return []
|
|
|
|
|
|
class IEN(Document):
|
|
def __init__(self,
|
|
number: int):
|
|
super().__init__(DocType.IEN, number)
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
return []
|
|
|
|
|
|
class RTR(Document):
|
|
def __init__(self,
|
|
number: int):
|
|
super().__init__(DocType.RTR, number)
|
|
return
|
|
|
|
def update(self, **kwargs) -> Document:
|
|
return self
|
|
|
|
def get_references(self) -> list[tuple[str, Document]]:
|
|
return []
|
|
|
|
|
|
class IndexParser:
|
|
def __init__(self,
|
|
xml: str,
|
|
namespace: str = "http://www.rfc-editor.org/rfc-index"):
|
|
def _get_reflist(container: Element | None) -> list[Document]:
|
|
reflist: list[Document] = []
|
|
if container is not None:
|
|
for ref in container.findall(f"{{{namespace}}}doc-id"):
|
|
ref_type: str = DocType[ref.text[:3]]
|
|
ref_num: int = int(ref.text[3:])
|
|
if ref_num not in self.index[ref_type]:
|
|
if ref_type == DocType.RFC:
|
|
self.index[DocType.RFC][ref_num] = RFC(ref_num)
|
|
elif ref_type == DocType.STD:
|
|
self.index[DocType.STD][ref_num] = STD(ref_num)
|
|
elif ref_type == DocType.BCP:
|
|
self.index[DocType.BCP][ref_num] = BCP(ref_num)
|
|
elif ref_type == DocType.FYI:
|
|
self.index[DocType.FYI][ref_num] = FYI(ref_num)
|
|
elif ref_type == DocType.NIC:
|
|
self.index[DocType.NIC][ref_num] = NIC(ref_num)
|
|
elif ref_type == DocType.IEN:
|
|
self.index[DocType.IEN][ref_num] = IEN(ref_num)
|
|
else: # ref_type == DocType.RTR
|
|
self.index[DocType.RTR][ref_num] = RTR(ref_num)
|
|
reflist.append(self.index[ref_type][ref_num])
|
|
return reflist
|
|
|
|
self.index: dict[DocType: dict[int, Document]] = {DocType.RFC: {},
|
|
DocType.STD: {},
|
|
DocType.BCP: {},
|
|
DocType.FYI: {},
|
|
DocType.NIC: {},
|
|
DocType.IEN: {},
|
|
DocType.RTR: {}}
|
|
|
|
root: Element = fromstring(xml)
|
|
for child in root:
|
|
if child.tag == f"{{{namespace}}}rfc-entry":
|
|
docID: str = child.findtext(f"{{{namespace}}}doc-id")
|
|
number: int = int(docID[3:])
|
|
title: str = child.findtext(f"{{{namespace}}}title")
|
|
authors: list[Author] = []
|
|
for author in child.findall(f"{{{namespace}}}author"):
|
|
name: str = author.findtext(f"{{{namespace}}}name")
|
|
auth_title: str = author.findtext(f"{{{namespace}}}title", "")
|
|
org: str = author.findtext(f"{{{namespace}}}organization", "")
|
|
org_abbrev: str = author.findtext(f"{{{namespace}}}org-abbrev", "")
|
|
authors.append(Author(name, auth_title, org, org_abbrev))
|
|
tmp: Element | None = child.find(f"{{{namespace}}}date")
|
|
pub_year: int = int(tmp.findtext(f"{{{namespace}}}year"))
|
|
pub_month: int = Month[tmp.findtext(f"{{{namespace}}}month")].value
|
|
pub_day: int = int(tmp.findtext(f"{{{namespace}}}day", "1"))
|
|
pub_date: date = date(pub_year, pub_month, pub_day)
|
|
format: list[FileFormat] = []
|
|
tmp = child.find(f"{{{namespace}}}format")
|
|
if tmp is not None:
|
|
for file_format in tmp.findall(f"{{{namespace}}}file-format"):
|
|
format.append(FileFormat[file_format.text])
|
|
page_count: int = int(child.findtext(f"{{{namespace}}}page-count", "-1"))
|
|
if page_count < 0:
|
|
page_count = None
|
|
keywords: list[str] = []
|
|
tmp = child.find(f"{{{namespace}}}keywords")
|
|
if tmp is not None:
|
|
for kw in tmp.findall(f"{{{namespace}}}kw"):
|
|
keywords.append(kw.text)
|
|
abstract: list[str] = []
|
|
tmp = child.find(f"{{{namespace}}}abstract")
|
|
if tmp is not None:
|
|
for p in tmp.findall(f"{{{namespace}}}p"):
|
|
abstract.append(p.text)
|
|
draft: str = child.findtext(f"{{{namespace}}}draft", "")
|
|
notes: str = child.findtext(f"{{{namespace}}}notes", "")
|
|
tmp = child.find(f"{{{namespace}}}obsoletes")
|
|
obsoletes: list[Document] = _get_reflist(tmp)
|
|
tmp = child.find(f"{{{namespace}}}obsoleted-by")
|
|
obsoleted_by: list[Document] = _get_reflist(tmp)
|
|
tmp = child.find(f"{{{namespace}}}updates")
|
|
updates: list[Document] = _get_reflist(tmp)
|
|
tmp = child.find(f"{{{namespace}}}updated-by")
|
|
updated_by: list[Document] = _get_reflist(tmp)
|
|
tmp = child.find(f"{{{namespace}}}is-also")
|
|
is_also: list[Document] = _get_reflist(tmp)
|
|
tmp = child.find(f"{{{namespace}}}see-also")
|
|
see_also: list[Document] = _get_reflist(tmp)
|
|
current_status: Status = Status[child.findtext(f"{{{namespace}}}current-status").replace(" ", "_")]
|
|
pub_status: Status = Status[child.findtext(f"{{{namespace}}}publication-status").replace(" ", "_")]
|
|
stream: Stream = None
|
|
tmp = child.find(f"{{{namespace}}}stream")
|
|
if tmp is not None:
|
|
stream = Stream[tmp.text]
|
|
area: str = child.findtext(f"{{{namespace}}}area", "")
|
|
wg_acronym: str = child.findtext(f"{{{namespace}}}wg_acronym", "")
|
|
errata_url: str = child.findtext(f"{{{namespace}}}errata-url", "")
|
|
doi: str = child.findtext(f"{{{namespace}}}doi", "")
|
|
if number in self.index[DocType.RFC]:
|
|
self.index[DocType.RFC][number].update(title=title,
|
|
authors=authors,
|
|
pub_date=pub_date,
|
|
current_status=current_status,
|
|
pub_status=pub_status,
|
|
format=format,
|
|
page_count=page_count,
|
|
keywords=keywords,
|
|
abstract=abstract,
|
|
draft=draft,
|
|
notes=notes,
|
|
obsoletes=obsoletes,
|
|
obsoleted_by=obsoleted_by,
|
|
updates=updates,
|
|
updated_by=updated_by,
|
|
is_also=is_also,
|
|
see_also=see_also,
|
|
stream=stream,
|
|
area=area,
|
|
wg_acronym=wg_acronym,
|
|
errata_url=errata_url,
|
|
doi=doi)
|
|
else:
|
|
self.index[DocType.RFC][number] = RFC(number,
|
|
title,
|
|
authors,
|
|
pub_date,
|
|
current_status,
|
|
pub_status,
|
|
format,
|
|
page_count,
|
|
keywords,
|
|
abstract,
|
|
draft,
|
|
notes,
|
|
obsoletes,
|
|
obsoleted_by,
|
|
updates,
|
|
updated_by,
|
|
is_also,
|
|
see_also,
|
|
stream,
|
|
area,
|
|
wg_acronym,
|
|
errata_url,
|
|
doi)
|
|
continue
|
|
elif child.tag == f"{{{namespace}}}rfc-not-issued-entry":
|
|
docID: str = child.findtext(f"{{{namespace}}}doc-id")
|
|
number: int = int(docID[3:])
|
|
if number not in self.index[DocType.RFC]:
|
|
self.index[DocType.RFC][number] = NotIssued(number)
|
|
continue
|
|
elif child.tag == f"{{{namespace}}}std-entry":
|
|
docID: str = child.findtext(f"{{{namespace}}}doc-id")
|
|
number: int = int(docID[3:])
|
|
title: str = child.findtext(f"{{{namespace}}}title")
|
|
alias: Element = child.find(f"{{{namespace}}}is-also")
|
|
is_also: list[Document] = _get_reflist(alias)
|
|
if number in self.index[DocType.STD]:
|
|
self.index[DocType.STD][number].update(title=title, is_also=is_also)
|
|
else:
|
|
self.index[DocType.STD][number] = STD(number, title, is_also)
|
|
continue
|
|
elif child.tag == f"{{{namespace}}}bcp-entry":
|
|
docID: str = child.findtext(f"{{{namespace}}}doc-id")
|
|
number: int = int(docID[3:])
|
|
title: str = child.findtext(f"{{{namespace}}}title", "")
|
|
alias: Element = child.find(f"{{{namespace}}}is-also")
|
|
is_also: list[Document] = _get_reflist(alias)
|
|
if number in self.index[DocType.BCP]:
|
|
self.index[DocType.BCP][number].update(title=title, is_also=is_also)
|
|
else:
|
|
self.index[DocType.BCP][number] = BCP(number, title, is_also)
|
|
continue
|
|
elif child.tag == f"{{{namespace}}}fyi-entry":
|
|
docID: str = child.findtext(f"{{{namespace}}}doc-id")
|
|
number: int = int(docID[3:])
|
|
title: str = child.findtext(f"{{{namespace}}}title", "")
|
|
alias: Element = child.find(f"{{{namespace}}}is-also")
|
|
is_also: list[Document] = _get_reflist(alias)
|
|
if number in self.index[DocType.FYI]:
|
|
self.index[DocType.FYI][number].update(title=title, is_also=is_also)
|
|
else:
|
|
self.index[DocType.FYI][number] = FYI(number, title, is_also)
|
|
continue
|
|
return
|
|
|
|
def get_index(self) -> dict[DocType: dict[int, Document]]:
|
|
return self.index
|