Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/generate-files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:

steps:
- name: Check out GEDCOM
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Get the branch name
id: extract_branch
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/propagate-main-to-v7.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:

steps:
- name: Check out GEDCOM
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Set git config
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/validate-yaml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

steps:
- name: Checkout GEDCOM
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Validate YAML
run: yamllint .
1 change: 0 additions & 1 deletion build/hyperlink-code.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def anchorify(m):
return full

doc = re.sub(r'<code>(g7:[^<]*)</code></h', r'<code class="uri">\1</code></h', doc)
doc = re.sub(r'<code>(g7.1:[^<]*)</code></h', r'<code class="uri">\1</code></h', doc)

chunks = re.split(r'(<pre[^>]*ged(?:struct|com)[^>]*>.*?</pre>)', doc, flags=re.DOTALL)

Expand Down
6 changes: 1 addition & 5 deletions build/hyperlink.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ def slugify(bit):
si = bit.rfind('`g7:')+4
ei = bit.find('`', si)
slug = bit[si:ei].replace('#','-')
elif '`g7.1:' in bit:
si = bit.rfind('`g7.1:')+6
ei = bit.find('`', si)
slug = bit[si:ei].replace('#','-')
elif '`' in bit:
bit = re.search('`[A-Z0-9_`.]+`', bit)
slug = bit.group(0).replace('`','').replace('.','-')
Expand Down Expand Up @@ -91,7 +87,7 @@ def abnf(m):
slug = table_tags[m.group(1)]
return linkify(m.group(0), slug)
return m.group(0)
uried = re.sub(r'(?<![\[.`])`g7(?:\.1)?:[-A-Z0-9a-z`._#]+`', repl, line)
uried = re.sub(r'(?<![\[.`])`g7:[-A-Z0-9a-z`._#]+`', repl, line)
if istable: return uried
tagged = re.sub(r'(?<![\[.`])`[A-Z0-9`._#]+`', repl, uried)
abnfed = re.sub(r'(?<![\[.`])`([A-Za-z0-9]+)`', abnf, tagged)
Expand Down
100 changes: 31 additions & 69 deletions build/uri-def.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@ def find_data_types(txt, g7):
for section in re.finditer(r'^#+ *([^\n]*)\n+((?:[^\n]|\n+[^\n#])*[^\n]*URI for[^\n]*data types? is(?:[^\n]|\n+[^\n#])*)', txt, re.M):
for dt, uri in re.findall(r'URI[^\n]*`([^\n`]*)` data type[^\n]*`([^`\n:]*:[^\n`]*)`', section.group(0)):
dturi[dt] = uri
if uri.startswith('g7:') or uri.startswith('g7.1:'):
slug = uri[uri.find(':')+1:]
if '#' in uri: uri = uri[:uri.find('#')]
if slug not in g7:
g7[slug] = ('data type', [section.group(2).strip()])
if uri.startswith('g7:'):
if uri[3:] not in g7:
g7[uri[3:]] = ('data type', [section.group(2).strip()])
return dturi

def find_cat_tables(txt, g7, tagsets):
Expand Down Expand Up @@ -110,19 +108,18 @@ def find_cat_tables(txt, g7, tagsets):
raise Exception("unexpected enumeration URI prefix "+repr(pfx))
if pfx not in cats:
cats[pfx] = meaning
if pfx.startswith('g7:') or pfx.startswith('g7.1:'):
slug = pfx[pfx.find(':')+1:]
if slug in g7:
raise Exception(pfx+' defined as an enumeration and a '+g7[slug][0])
if pfx.startswith('g7:'):
if pfx[3:] in g7:
raise Exception(pfx+' defined as an enumeration and a '+g7[pfx[3:]][0])
if label:
g7[slug] = (yamltype, meaning, None, label)
g7[pfx[3:]] = (yamltype, meaning, None, label)
else:
g7[slug] = (yamltype, meaning)
g7[pfx[3:]] = (yamltype, meaning)
return enums, calendars

def find_calendars(txt, g7):
"""Looks for sections defining a `g7:cal-` URI"""
for bit in re.finditer(r'#+ `[^`]*`[^\n]*\n+((?:\n+(?!#)|[^\n])*is `g7(?:\.1)?:(cal-[^`]*)`(?:\n+(?!#)|[^\n#])*)', txt):
for bit in re.finditer(r'#+ `[^`]*`[^\n]*\n+((?:\n+(?!#)|[^\n])*is `g7:(cal-[^`]*)`(?:\n+(?!#)|[^\n#])*)', txt):
m = re.search('The epoch markers? ([`_A-Z0-9, and]+) (is|are) permitted', bit.group(1))
marker = [] if not m else re.findall(r'[A-Z0-9_]+', m[1])
m = re.match(r'^The ([A-Z][A-Za-z]* )+calendar', bit.group(1))
Expand Down Expand Up @@ -226,27 +223,24 @@ def find_descriptions(txt, g7, ssp):
for name,uri,desc in re.findall(r'#+ `[^`]*`[^\n]*\(([^)]*)\)[^\n]*`([^:`\n]*:[^`\n]*)`[^\n]*\n+((?:\n+(?!#)|[^\n])*)', txt):
if uri not in ssp:
raise Exception('Found section for '+uri+' but no gedstruct')
if uri.startswith('g7:') or uri.startswith('g7.1:'):
slug = uri[uri.find(':')+1:]
g7.setdefault(slug,('structure',[],ssp[uri],name.strip()))[1].extend((
if uri.startswith('g7:'):
g7.setdefault(uri[3:],('structure',[],ssp[uri],name.strip()))[1].extend((
name.strip(),
desc.strip()
))
for other in re.findall(r'[Aa] type of `(\S*)`', desc):
m = re.search('^#+ +`'+other+r'`[^\n`]*\n((?:[^\n]+|\n+(?!#))*)', txt, re.M)
if m:
g7[uri[uri.find(':')+1:]][1].append(m.group(1).strip())
g7[uri[3:]][1].append(m.group(1).strip())

# error check that gedstruct and sections align
for uri in ssp:
if uri.startswith('g7:') and uri[3:] not in g7:
raise Exception('Found gedstruct for '+uri+' but no section')
if uri.startswith('g7.1:') and uri[5:] not in g7:
raise Exception('Found gedstruct for '+uri+' but no section')

# gedstruct sections
for uri, desc in re.findall(r'#+ *`[^`]*` *:=[^\n]*\n+`+[^\n]*\n+n [^\n]*\} *(\S+:\S+) *(?:\n [^\n]*)*\n`+[^\n]*\n+((?:[^\n]|\n(?!#))*)', txt):
g7[uri[uri.find(':')+1:]][1].append(desc.strip())
g7[uri[3:]][1].append(desc.strip())

tagsets = {}
# tag tables
Expand All @@ -257,11 +251,7 @@ def find_descriptions(txt, g7, ssp):
if header.startswith('Indi'): pfx = 'INDI-'
for tag, name, desc in re.findall(r'`([A-Z_0-9]+)` *\| *([^|\n]*?) *\| *([^|\n]*[^ |\n]) *', table.group(2)):
if '<br' in name:
if '`g7:' in name:
tag = name[name.find('`g7:')+4:name.rfind('`')]
elif '`g7.1' in name:
tag = name[name.find('`g7.1')+6:name.rfind('`')]
else: assert False, "name without URI: "+repr(name)
tag = name[name.find('`g7:')+4:name.rfind('`')]
name = name[:name.find('<br')]
if tag not in g7: tag = pfx+tag
if tag not in g7:
Expand All @@ -282,7 +272,7 @@ def find_enum_by_link(txt, enums, tagsets):
# 'g7:FAM-FACT',
# )) ## do not do for enumset-EVEN
enum_prefix = {k[k.find('enum-')+5:] for e in enums.values() for k in e }
for sect in re.finditer(r'# *`(g7(?:\.1)?:enumset-[^`]*)`[\s\S]*?\n#', txt):
for sect in re.finditer(r'# *`(g7:enumset-[^`]*)`[\s\S]*?\n#', txt):
if '[Events]' in sect.group(0):
key = sect.group(1).replace('`','').replace('.','-')
for k in tagsets:
Expand All @@ -291,7 +281,7 @@ def find_enum_by_link(txt, enums, tagsets):
for tag in tagsets[k]:
if tag.startswith('INDI-') and tag[5:] in enum_prefix: tag = 'enum-'+tag[5:]
if tag.startswith('FAM-') and tag[4:] in enum_prefix: tag = 'enum-'+tag[4:]
tag = addpfx(tag)
tag = 'g7:'+tag
if tag in enums[key]: continue
enums[key].append(tag)
if '[Attributes]' in sect.group(0):
Expand All @@ -302,22 +292,22 @@ def find_enum_by_link(txt, enums, tagsets):
for tag in tagsets[k]:
if tag.startswith('INDI-') and tag[5:] in enum_prefix: tag = 'enum-'+tag[5:]
if tag.startswith('FAM-') and tag[4:] in enum_prefix: tag = 'enum-'+tag[4:]
tag = addpfx(tag)
tag = 'g7:'+tag
if tag in enums[key]: continue
enums[key].append(tag)
# enums.setdefault(key, []).extend(_ for _ in ['g7:'+_2.replace('INDI-','enum-').replace('FAM-','enum-') for _2 in tagsets[k]] if _ not in enums.get(key,[]))

def find_enumsets(txt):
res = {}
for sect in re.finditer(r'# *[^\n]*?`(g7(?:\.1)?:[^`]*)`([\s\S]*?)\n#', txt):
if re.search(f'from set `g7(?:\.1)?:enumset-', sect.group(2)):
for sect in re.finditer(r'# *[^\n]*?`(g7:[^`]*)`([\s\S]*?)\n#', txt):
if 'from set `g7:enumset-' in sect.group(2):
key = sect.group(1)
val = re.search(r'from set `(g7(?:\.1)?:enumset-[^`]*)`', sect.group(2)).group(1)
val = re.search(r'from set `(g7:enumset-[^`]*)`', sect.group(2)).group(1)
res[key] = val
return res

def tidy_markdown(md, indent, width=79):
"""
r"""
The markdown files in the specification directory use the following Markdown dialect:

Part of GFM:
Expand Down Expand Up @@ -351,7 +341,7 @@ def tidy_markdown(md, indent, width=79):
import mdformat
out = mdformat.text(md, extensions={"gfm"}, options={"number":True, "wrap":width})

return out.rstrip().replace('\n','\n'+' '*indent).replace('\[','[').replace('\]',']')
return out.rstrip().replace('\n','\n'+' '*indent).replace(r'\[','[').replace(r'\]',']')

def yaml_str_helper(pfx, md, width=79):
txt = tidy_markdown(md, len(pfx), width)
Expand All @@ -362,16 +352,10 @@ def yaml_str_helper(pfx, md, width=79):
return pfx + txt

def expand_prefix(txt, prefixes):
global prerelease
for key in sorted(prefixes.keys(), key=lambda x:-len(x)):
k = key+':'
if txt.startswith(k):
uri = prefixes[key] + txt[len(k):]
if 'https://gedcom.io/terms/v7.1/' in uri:
prerelease = True
return uri
if 'https://gedcom.io/terms/v7.1/' in txt:
prerelease = True
return prefixes[key] + txt[len(k):]
return txt

if __name__ == '__main__':
Expand All @@ -381,24 +365,14 @@ def expand_prefix(txt, prefixes):
txt = get_text(specs)

prefixes = get_prefixes(txt)
prefix_of = {} # generally {tag: 'g7'} or {"record-REPO":"g7.1"} but sometimes {"month-":"g7"} for a set of values
for [pfx,slug] in re.findall('('+'|'.join(prefixes)+r'):([^\s`<>]+)', txt):
assert prefix_of.get(slug,pfx) == pfx, f"Multiple prefixes for {slug}: {prefix_of[slug]} and {pfx}"
prefix_of[slug] = pfx
def addpfx(tag):
if tag in prefix_of: return prefix_of[tag]+':'+tag
if '-' in tag:
lead = tag[:tag.find('-')+1]
if lead in prefix_of: return prefix_of[lead]+':'+tag
assert False, 'no prefix for '+tag+' in '+str(prefix_of)
dtypes = find_data_types(txt, g7)
rules = parse_rules(txt)
ssp = parse_gedstruct(txt, rules, dtypes)
tagsets = find_descriptions(txt, g7, ssp)
enums, calendars = find_cat_tables(txt, g7, tagsets)
find_enum_by_link(txt, enums, tagsets)
for k in enums:
g7[k[k.find(':')+1:]] = ('enumeration set',[])
g7[k[3:]] = ('enumeration set',[])
enumsets = find_enumsets(txt)
find_calendars(txt, g7)
dtypes_inv = {expand_prefix(v,prefixes):k for k,v in dtypes.items()}
Expand All @@ -411,19 +385,17 @@ def addpfx(tag):

for tag in g7:
print('outputting', tag, '...', end=' ')
prerelease = False
maybe = join(dirname(specs[0]),'terms',tag)
if exists(maybe):
copyfile(maybe, join(dest,tag))
print('by copying', maybe, '...', end=' ')
continue
thispath = join(dest,tag.replace('#','-'))
with open(thispath, 'w') as fh:
with open(join(dest,tag.replace('#','-')), 'w') as fh:
fh.write('%YAML 1.2\n---\n')
print('lang: en-US', file=fh)
print('\ntype:',g7[tag][0], file=fh)

uri = expand_prefix(addpfx(tag),prefixes)
uri = expand_prefix('g7:'+tag,prefixes)
print('\nuri:', uri, file=fh)

if g7[tag][0] in ('structure', 'enumeration', 'calendar', 'month'):
Expand Down Expand Up @@ -452,7 +424,7 @@ def addpfx(tag):
print('\npayload:', payload, file=fh)
payload_lookup.append([uri, payload if payload != 'null' else ''])
if d['pay'] and 'Enum' in d['pay']:
setname = expand_prefix(enumsets[addpfx(tag)],prefixes)
setname = expand_prefix(enumsets['g7:'+tag],prefixes)
print('\nenumeration set: "'+setname+'"', file=fh)
enum_lookup.append([uri,setname])
# print('\nenumeration values:', file=fh)
Expand All @@ -478,7 +450,7 @@ def addpfx(tag):
struct_lookup.append(['',ptag,uri])
elif g7[tag][0] == 'calendar':
print('\nmonths:', file=fh)
for k in calendars[addpfx(tag)]:
for k in calendars['g7:'+tag]:
print(' - "'+expand_prefix(k, prefixes)+'"', file=fh)
if len(g7[tag][2]) == 0:
print('\nepochs: []', file=fh)
Expand All @@ -488,11 +460,11 @@ def addpfx(tag):
print(' -', epoch, file=fh)
elif g7[tag][0] == 'month':
print('\ncalendars:', file=fh)
for k in calendars[addpfx(tag)]:
for k in calendars['g7:'+tag]:
print(' - "'+expand_prefix(k, prefixes)+'"', file=fh)
elif g7[tag][0] == 'enumeration set':
print('\nenumeration values:', file=fh)
for k in enums[addpfx(tag)]:
for k in enums['g7:'+tag]:
valname = expand_prefix(k, prefixes)
print(' - "'+valname+'"', file=fh)
enumset_lookup.append([uri, valname])
Expand All @@ -501,20 +473,11 @@ def addpfx(tag):
# handle use in enumerations (which can include any tag type)
is_used_by = False
for tag2 in sorted(enums):
if (addpfx(tag)) in enums[tag2]:
if ('g7:'+tag) in enums[tag2]:
if not is_used_by:
print('\nvalue of:', file=fh)
is_used_by = True
print(' - "'+expand_prefix(tag2,prefixes)+'"', file=fh)

if prerelease:
print('\nprerelease: true', file=fh)

# manually check for v7.1 subsuming v7.0
if '/v7.1/' in uri:
res = run(['git','show','main:'+thispath], capture_output=True)
if not res.returncode:
print('\nsubsumes:', uri.replace('/v7.1/','/v7/'), file=fh)

print('\ncontact: "https://gedcom.io/community/"', file=fh)
fh.write('...\n')
Expand Down Expand Up @@ -542,4 +505,3 @@ def addpfx(tag):
for row in data:
print('\t'.join(row), file=f)
print('done')

1 change: 0 additions & 1 deletion specification/gedcom-0-introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ is shorthand for a URI beginning with the corresponding URI prefix
| Short Prefix | URI Prefix |
|:-------------|:------------------------------------|
| `g7` | `https://gedcom.io/terms/v7/` |
| `g7.1` | `https://gedcom.io/terms/v7.1/` |
| `xsd` | `http://www.w3.org/2001/XMLSchema#` |
| `dcat` | `http://www.w3.org/ns/dcat#` |

Expand Down
6 changes: 4 additions & 2 deletions specification/gedcom-1-hierarchical-container-format.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ Extensions cannot change existing meanings, cardinalities, or calendars.
A **tagged extension structure** is a structure whose tag matches production `extTag`. Tagged extension structures may appear as records or substructures of any other structure. Their meaning is defined by their tag, as is discussed more fully in the section [Extension Tags].

Any substructure of a tagged extension structure that uses a tag matching `stdTag` is an **extension-defined substructure**.
Substructures of an extension-defined substructure that uses a tag matching `stdTag` are also extension-defined substructures, but this specification deprecates using a `stdTag` with a definition that does not match any standard type with that tag.
Substructures of an extension-defined substructure that uses a tag matching `stdTag` are also extension-defined substructures.
The meaning and use of each extension-defined substructure is defined by the tagged extension structure it occurs within, not by its tag alone nor by this specification.

:::example
Expand All @@ -343,7 +343,9 @@ deprecated.
- Even though both `DATE`s appear to have `g7:type-DATE` payloads, we can't know that is the intended data type without consulting the defining specifications of `_LOC` and `_POP`, respectively. The first might be a `g7:type-DATE#period` and the second a `g7:type-DATE#exact`, for example.
:::

If an extension-defined substructure has a tag that is also used by one or more standard structures, its meaning and payload type should match at least one of those standard structure types.
Extension-defined substructures should match the structure type, payload, and substructure collection of at least one
standard type with the same tag, though it can add more substructures to the substructure collection.
This specification deprecates using a `stdTag` with a definition that does not match any standard type with that tag.

:::example
An extension-defined substructure with tag "`DATE`" should provide a date or date period relevant to its superstructure, as do all `DATE`-tagged structures in this specification. Extensions should not use "`DATE`" to tag a structure describing anything else (even something that might reasonably be abbreviated "date", such as someone an individual dated).
Expand Down
12 changes: 4 additions & 8 deletions specification/gedcom-3-structures-1-organization.md
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,7 @@ A `MULTIMEDIA_RECORD` may contain a pointer to a `SOURCE_RECORD` and vice versa.
#### `REPOSITORY_RECORD` :=

```gedstruct
n @XREF:REPO@ REPO {1:1} g7.1:record-REPO
+1 RESN <List:Enum> {0:1} g7:RESN
n @XREF:REPO@ REPO {1:1} g7:record-REPO
+1 NAME <Text> {1:1} g7:NAME
+1 <<ADDRESS_STRUCTURE>> {0:1}
+1 PHON <Special> {0:M} g7:PHON
Expand All @@ -371,8 +370,7 @@ Until such time, it is recommended that the repository record store current cont
#### `SHARED_NOTE_RECORD` :=

```gedstruct
n @XREF:SNOTE@ SNOTE <Text> {1:1} g7.1:record-SNOTE
+1 RESN <List:Enum> {0:1} g7:RESN
n @XREF:SNOTE@ SNOTE <Text> {1:1} g7:record-SNOTE
+1 MIME <MediaType> {0:1} g7:MIME
+1 LANG <Language> {0:1} g7:LANG
+1 TRAN <Text> {0:M} g7:NOTE-TRAN
Expand Down Expand Up @@ -417,8 +415,7 @@ A `SHARED_NOTE_RECORD` may contain a pointer to a `SOURCE_RECORD` and vice versa
#### `SOURCE_RECORD` :=

```gedstruct
n @XREF:SOUR@ SOUR {1:1} g7.1:record-SOUR
+1 RESN <List:Enum> {0:1} g7:RESN
n @XREF:SOUR@ SOUR {1:1} g7:record-SOUR
+1 DATA {0:1} g7:DATA
+2 EVEN <List:Enum> {0:M} g7:DATA-EVEN
+3 DATE <DatePeriod> {0:1} g7:DATA-EVEN-DATE
Expand Down Expand Up @@ -456,8 +453,7 @@ A `SOURCE_RECORD` may contain a pointer to a `MULTIMEDIA_RECORD` and vice versa.
#### `SUBMITTER_RECORD` :=

```gedstruct
n @XREF:SUBM@ SUBM {1:1} g7.1:record-SUBM
+1 RESN <List:Enum> {0:1} g7:RESN
n @XREF:SUBM@ SUBM {1:1} g7:record-SUBM
+1 NAME <Text> {1:1} g7:NAME
+1 <<ADDRESS_STRUCTURE>> {0:1}
+1 PHON <Special> {0:M} g7:PHON
Expand Down
Loading