from enum import Enum, auto from abc import ABC, abstractmethod from datetime import date from xml.etree.ElementTree import Element from defusedxml.ElementTree import fromstring class DocType(Enum): RFC = 1 STD = 2 BCP = 3 FYI = 4 NIC = 5 IEN = 6 RTR = 7 def docID(self, num: int) -> str: if self.value < 5: # RFC, STD, BCP, FYI return f"{self.name}{str(num).rjust(4, '0')}" else: # NIC, IEN, RTR return f"{self.name}{num}" class Status(Enum): INTERNET_STANDARD = auto() DRAFT_STANDARD = auto() PROPOSED_STANDARD = auto() UNKNOWN = auto() BEST_CURRENT_PRACTICE = auto() FOR_YOUR_INFORMATION = auto() EXPERIMENTAL = auto() HISTORIC = auto() INFORMATIONAL = auto() class FileFormat(Enum): ASCII = auto() PS = auto() PDF = auto() TGZ = auto() HTML = auto() XML = auto() TEXT = auto() class Stream(Enum): IETF = auto() IAB = auto() IRTF = auto() INDEPENDENT = auto() Editorial = auto() Legacy = auto() class Month(Enum): January = 1 February = 2 March = 3 April = 4 May = 5 June = 6 July = 7 August = 8 September = 9 October = 10 November = 11 December = 12 class Author: def __init__(self, name: str, title: str = "", organization: str = "", org_abbrev: str = ""): self.name: str = name self.title: str = title self.organization: str = organization self.org_abbrev: str = org_abbrev return class Document(ABC): def __init__(self, type: DocType, number: int, title: str = "", is_also: list['Document'] = []): self.type: DocType = type self.number: int = number self.title: str = title self.is_also: list['Document'] = is_also return def docID(self) -> str: return self.type.docID(self.number) @abstractmethod def update(self, **kwargs) -> 'Document': pass @abstractmethod def get_references(self) -> list[tuple[str, 'Document']]: pass class RFC(Document): def __init__(self, number: int, title: str = "", authors: list[Author] = [], pub_date: date = None, current_status: Status = Status.UNKNOWN, pub_status: Status = Status.UNKNOWN, format: list[FileFormat] = [], page_count: int = None, keywords: list[str] = [], abstract: list[str] = [], draft: str = "", notes: str = "", obsoletes: list[Document] = [], obsoleted_by: list[Document] = [], updates: list[Document] = [], updated_by: list[Document] = [], is_also: list[Document] = [], see_also: list[Document] = [], stream: Stream = None, area: str = "", wg_acronym: str = "", errata_url: str = "", doi: str = ""): super().__init__(DocType.RFC, number, title, is_also) self.authors: list[Author] = authors self.pub_date: date = pub_date self.format: list[FileFormat] = format self.page_count: int = page_count self.keywords: list[str] = keywords self.abstract: list[str] = abstract self.draft: str = draft self.notes: str = notes self.obsoletes: list[Document] = obsoletes self.obsoleted_by: list[Document] = obsoleted_by self.updates: list[Document] = updates self.updated_by: list[Document] = updated_by self.see_also: list[Document] = see_also self.current_status: Status = current_status self.pub_status: Status = pub_status self.stream: Stream = stream self.area: str = area self.wg_acronym: str = wg_acronym self.errata_url: str = errata_url self.doi: str = doi return def update(self, **kwargs) -> Document: if 'title' in kwargs: self.title = kwargs['title'] if 'authors' in kwargs: self.authors = kwargs['authors'] if 'pub_date' in kwargs: self.pub_date = kwargs["pub_date"] if 'current_status' in kwargs: self.current_status = kwargs["current_status"] if 'pub_status' in kwargs: self.pub_status = kwargs["pub_status"] if 'format' in kwargs: self.format = kwargs["format"] if 'page_count' in kwargs: self.page_count = kwargs["page_count"] if 'keywords' in kwargs: self.keywords = kwargs["keywords"] if 'abstract' in kwargs: self.abstract = kwargs["abstract"] if 'draft' in kwargs: self.draft = kwargs["draft"] if 'notes' in kwargs: self.notes = kwargs["notes"] if 'obsoletes' in kwargs: self.obsoletes = kwargs["obsoletes"] if 'obsoleted_by' in kwargs: self.obsoleted_by = kwargs["obsoleted_by"] if 'updates' in kwargs: self.updates = kwargs["updates"] if 'updated_by' in kwargs: self.updated_by = kwargs["updated_by"] if 'is_also' in kwargs: self.is_also = kwargs["is_also"] if 'see_also' in kwargs: self.see_also = kwargs["see_also"] if 'stream' in kwargs: self.stream = kwargs["stream"] if 'area' in kwargs: self.area = kwargs["area"] if 'wg_acronym' in kwargs: self.wg_acronym = kwargs["wg_acronym"] if 'errata_url' in kwargs: self.errata_url = kwargs["errata_url"] if 'doi' in kwargs: self.doi = kwargs["doi"] return self def get_references(self) -> list[tuple[str, Document]]: reftypes: list[str] = ["obsoletes"]*len(self.obsoletes)\ + ["obsoleted by"]*len(self.obsoleted_by)\ + ["updates"]*len(self.updates)\ + ["updated by"]*len(self.updated_by)\ + ["is also"]*len(self.is_also)\ + ["see also"]*len(self.see_also) refs: list[Document] = self.obsoletes \ + self.obsoleted_by \ + self.updates \ + self.updated_by \ + self.is_also \ + self.see_also return list(zip(reftypes, refs)) class NotIssued(Document): def __init__(self, number: int): super().__init__(DocType.RFC, number) return def update(self, **kwargs) -> Document: return self def get_references(self) -> list[tuple[str, Document]]: return [] class STD(Document): def __init__(self, number: int, title: str = "", is_also: list[Document] = []): super().__init__(DocType.STD, number, title, is_also) return def update(self, **kwargs) -> Document: if 'title' in kwargs: self.title = kwargs['title'] if 'is_also' in kwargs: self.is_also = kwargs['is_also'] return self def get_references(self) -> list[tuple[str, Document]]: return list(zip(["is also"]*len(self.is_also), self.is_also)) class BCP(Document): def __init__(self, number: int, title: str = "", is_also: list[Document] = []): super().__init__(DocType.BCP, number, title, is_also) return def update(self, **kwargs) -> Document: if 'title' in kwargs: self.title = kwargs['title'] if 'is_also' in kwargs: self.is_also = kwargs['is_also'] return self def get_references(self) -> list[tuple[str, Document]]: return list(zip(["is also"]*len(self.is_also), self.is_also)) class FYI(Document): def __init__(self, number: int, title: str = "", is_also: list[Document] = []): super().__init__(DocType.FYI, number, title, is_also) return def update(self, **kwargs) -> Document: if 'title' in kwargs: self.title = kwargs['title'] if 'is_also' in kwargs: self.is_also = kwargs['is_also'] return self def get_references(self) -> list[tuple[str, Document]]: return list(zip(["is also"]*len(self.is_also), self.is_also)) class NIC(Document): def __init__(self, number: int): super().__init__(DocType.NIC, number) return def update(self, **kwargs) -> Document: return self def get_references(self) -> list[tuple[str, Document]]: return [] class IEN(Document): def __init__(self, number: int): super().__init__(DocType.IEN, number) return def update(self, **kwargs) -> Document: return self def get_references(self) -> list[tuple[str, Document]]: return [] class RTR(Document): def __init__(self, number: int): super().__init__(DocType.RTR, number) return def update(self, **kwargs) -> Document: return self def get_references(self) -> list[tuple[str, Document]]: return [] class IndexParser: def __init__(self, xml: str, namespace: str = "http://www.rfc-editor.org/rfc-index"): def _get_reflist(container: Element | None) -> list[Document]: reflist: list[Document] = [] if container is not None: for ref in container.findall(f"{{{namespace}}}doc-id"): ref_type: str = DocType[ref.text[:3]] ref_num: int = int(ref.text[3:]) if ref_num not in self.index[ref_type]: if ref_type == DocType.RFC: self.index[DocType.RFC][ref_num] = RFC(ref_num) elif ref_type == DocType.STD: self.index[DocType.STD][ref_num] = STD(ref_num) elif ref_type == DocType.BCP: self.index[DocType.BCP][ref_num] = BCP(ref_num) elif ref_type == DocType.FYI: self.index[DocType.FYI][ref_num] = FYI(ref_num) elif ref_type == DocType.NIC: self.index[DocType.NIC][ref_num] = NIC(ref_num) elif ref_type == DocType.IEN: self.index[DocType.IEN][ref_num] = IEN(ref_num) else: # ref_type == DocType.RTR self.index[DocType.RTR][ref_num] = RTR(ref_num) reflist.append(self.index[ref_type][ref_num]) return reflist self.index: dict[DocType: dict[int, Document]] = {DocType.RFC: {}, DocType.STD: {}, DocType.BCP: {}, DocType.FYI: {}, DocType.NIC: {}, DocType.IEN: {}, DocType.RTR: {}} root: Element = fromstring(xml) for child in root: if child.tag == f"{{{namespace}}}rfc-entry": docID: str = child.findtext(f"{{{namespace}}}doc-id") number: int = int(docID[3:]) title: str = child.findtext(f"{{{namespace}}}title") authors: list[Author] = [] for author in child.findall(f"{{{namespace}}}author"): name: str = author.findtext(f"{{{namespace}}}name") auth_title: str = author.findtext(f"{{{namespace}}}title", "") org: str = author.findtext(f"{{{namespace}}}organization", "") org_abbrev: str = author.findtext(f"{{{namespace}}}org-abbrev", "") authors.append(Author(name, auth_title, org, org_abbrev)) tmp: Element | None = child.find(f"{{{namespace}}}date") pub_year: int = int(tmp.findtext(f"{{{namespace}}}year")) pub_month: int = Month[tmp.findtext(f"{{{namespace}}}month")].value pub_day: int = int(tmp.findtext(f"{{{namespace}}}day", "1")) pub_date: date = date(pub_year, pub_month, pub_day) format: list[FileFormat] = [] tmp = child.find(f"{{{namespace}}}format") if tmp is not None: for file_format in tmp.findall(f"{{{namespace}}}file-format"): format.append(FileFormat[file_format.text]) page_count: int = int(child.findtext(f"{{{namespace}}}page-count", "-1")) if page_count < 0: page_count = None keywords: list[str] = [] tmp = child.find(f"{{{namespace}}}keywords") if tmp is not None: for kw in tmp.findall(f"{{{namespace}}}kw"): keywords.append(kw.text) abstract: list[str] = [] tmp = child.find(f"{{{namespace}}}abstract") if tmp is not None: for p in tmp.findall(f"{{{namespace}}}p"): abstract.append(p.text) draft: str = child.findtext(f"{{{namespace}}}draft", "") notes: str = child.findtext(f"{{{namespace}}}notes", "") tmp = child.find(f"{{{namespace}}}obsoletes") obsoletes: list[Document] = _get_reflist(tmp) tmp = child.find(f"{{{namespace}}}obsoleted-by") obsoleted_by: list[Document] = _get_reflist(tmp) tmp = child.find(f"{{{namespace}}}updates") updates: list[Document] = _get_reflist(tmp) tmp = child.find(f"{{{namespace}}}updated-by") updated_by: list[Document] = _get_reflist(tmp) tmp = child.find(f"{{{namespace}}}is-also") is_also: list[Document] = _get_reflist(tmp) tmp = child.find(f"{{{namespace}}}see-also") see_also: list[Document] = _get_reflist(tmp) current_status: Status = Status[child.findtext(f"{{{namespace}}}current-status").replace(" ", "_")] pub_status: Status = Status[child.findtext(f"{{{namespace}}}publication-status").replace(" ", "_")] stream: Stream = None tmp = child.find(f"{{{namespace}}}stream") if tmp is not None: stream = Stream[tmp.text] area: str = child.findtext(f"{{{namespace}}}area", "") wg_acronym: str = child.findtext(f"{{{namespace}}}wg_acronym", "") errata_url: str = child.findtext(f"{{{namespace}}}errata-url", "") doi: str = child.findtext(f"{{{namespace}}}doi", "") if number in self.index[DocType.RFC]: self.index[DocType.RFC][number].update(title=title, authors=authors, pub_date=pub_date, current_status=current_status, pub_status=pub_status, format=format, page_count=page_count, keywords=keywords, abstract=abstract, draft=draft, notes=notes, obsoletes=obsoletes, obsoleted_by=obsoleted_by, updates=updates, updated_by=updated_by, is_also=is_also, see_also=see_also, stream=stream, area=area, wg_acronym=wg_acronym, errata_url=errata_url, doi=doi) else: self.index[DocType.RFC][number] = RFC(number, title, authors, pub_date, current_status, pub_status, format, page_count, keywords, abstract, draft, notes, obsoletes, obsoleted_by, updates, updated_by, is_also, see_also, stream, area, wg_acronym, errata_url, doi) continue elif child.tag == f"{{{namespace}}}rfc-not-issued-entry": docID: str = child.findtext(f"{{{namespace}}}doc-id") number: int = int(docID[3:]) if number not in self.index[DocType.RFC]: self.index[DocType.RFC][number] = NotIssued(number) continue elif child.tag == f"{{{namespace}}}std-entry": docID: str = child.findtext(f"{{{namespace}}}doc-id") number: int = int(docID[3:]) title: str = child.findtext(f"{{{namespace}}}title") alias: Element = child.find(f"{{{namespace}}}is-also") is_also: list[Document] = _get_reflist(alias) if number in self.index[DocType.STD]: self.index[DocType.STD][number].update(title=title, is_also=is_also) else: self.index[DocType.STD][number] = STD(number, title, is_also) continue elif child.tag == f"{{{namespace}}}bcp-entry": docID: str = child.findtext(f"{{{namespace}}}doc-id") number: int = int(docID[3:]) title: str = child.findtext(f"{{{namespace}}}title", "") alias: Element = child.find(f"{{{namespace}}}is-also") is_also: list[Document] = _get_reflist(alias) if number in self.index[DocType.BCP]: self.index[DocType.BCP][number].update(title=title, is_also=is_also) else: self.index[DocType.BCP][number] = BCP(number, title, is_also) continue elif child.tag == f"{{{namespace}}}fyi-entry": docID: str = child.findtext(f"{{{namespace}}}doc-id") number: int = int(docID[3:]) title: str = child.findtext(f"{{{namespace}}}title", "") alias: Element = child.find(f"{{{namespace}}}is-also") is_also: list[Document] = _get_reflist(alias) if number in self.index[DocType.FYI]: self.index[DocType.FYI][number].update(title=title, is_also=is_also) else: self.index[DocType.FYI][number] = FYI(number, title, is_also) continue return def get_index(self) -> dict[DocType: dict[int, Document]]: return self.index