diff mbox series

[v3,1/2] Port shared code information from the wiki

Message ID 20210810173854.1326070-2-siddhesh@sourceware.org
State New
Headers show
Series Source attribution cleanups | expand

Commit Message

Siddhesh Poyarekar Aug. 10, 2021, 5:38 p.m. UTC
Since the shared code now has special status with respect to
copyrights, port them into a more structured format in the source tree
and add a python function that parses and returns a dictionary with
the information.

I need this to exclude these files from the Contributed-by changes and
I reckon it would be useful to know these files for future tooling.
---
 SHARED-FILES                 | 206 +++++++++++++++++++++++++++++++++++
 scripts/glibc_shared_code.py |  70 ++++++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 SHARED-FILES
 create mode 100644 scripts/glibc_shared_code.py
diff mbox series

Patch

diff --git a/SHARED-FILES b/SHARED-FILES
new file mode 100644
index 0000000000..d1c4fc4eeb
--- /dev/null
+++ b/SHARED-FILES
@@ -0,0 +1,206 @@ 
+# Files shared with other projects.  Pass a file path to the
+# get_glibc_shared_code() function in the python library
+# scripts/glibc_shared_code.py to get a dict object with this information.  See
+# the library sources for more information.
+
+# The headers on most of these files indicate that glibc is the canonical
+# source for these files, although in many cases there seem to be useful
+# changes in the gnulib versions that could be merged back in. Not all gnulib
+# files contain such a header and it is not always consistent in its format, so
+# it would be useful to make sure that all gnulib files that are using glibc as
+# upstream have a greppable header.
+#
+# These files are quite hard to find without a header to grep for and each file
+# has to be compared manually so this list is likely incomplete or may contain
+# errors.
+gnulib:
+  argp/argp-ba.c
+  argp/argp-ba.c
+  argp/argp-eexst.c
+  argp/argp-fmtstream.c
+  argp/argp-fmtstream.h
+  argp/argp-fs-xinl.c
+  argp/argp-help.c
+  argp/argp-namefrob.h
+  argp/argp-parse.c
+  argp/argp-pv.c
+  argp/argp-pvh.c
+  argp/argp-xinl.c
+  argp/argp.h
+  crypt/md5.c
+  crypt/md5.h
+  dirent/alphasort.c
+  dirent/scandir.c
+  locale/programs/3level.h
+  # Merged from gnulib 2014-6-23
+  malloc/obstack.c
+  # Merged from gnulib 2014-6-23
+  malloc/obstack.h
+  # Merged from gnulib 2014-07-10
+  misc/error.c
+  misc/error.h
+  misc/getpass.c
+  misc/mkdtemp.c
+  posix/fnmatch_loop.c
+  # Intended to be the same. Gnulib copy contains glibc changes.
+  posix/getopt.c
+  # Intended to be the same. Gnulib copy contains glibc changes.
+  posix/getopt1.c
+  # Intended to be the same. Gnulib copy contains glibc changes.
+  posix/getopt_int.h
+  posix/glob.c
+  posix/regcomp.c
+  posix/regex.c
+  posix/regex.h
+  posix/regex_internal.c
+  posix/regex_internal.h
+  posix/regexec.c
+  posix/spawn.c
+  posix/spawn_faction_addclose.c
+  posix/spawn_faction_adddup2.c
+  posix/spawn_faction_addopen.c
+  posix/spawn_faction_destroy.c
+  posix/spawn_faction_init.c
+  posix/spawn_int.h
+  posix/spawnattr_destroy.c
+  posix/spawnattr_getdefault.c
+  posix/spawnattr_getflags.c
+  posix/spawnattr_getpgroup.c
+  posix/spawnattr_getschedparam.c
+  posix/spawnattr_getschedpolicy.c
+  posix/spawnattr_getsigmask.c
+  posix/spawnattr_init.c
+  posix/spawnattr_setdefault.c
+  posix/spawnattr_setflags.c
+  posix/spawnattr_setpgroup.c
+  posix/spawnattr_setschedparam.c
+  posix/spawnattr_setschedpolicy.c
+  posix/spawnattr_setsigmask.c
+  posix/spawnp.c
+  stdlib/atoll.c
+  stdlib/getsubopt.c
+  stdlib/setenv.c
+  stdlib/strtoll.c
+  stdlib/strtoul.c
+  # Merged from gnulib 2014-6-26, needs merge back
+  string/memchr.c
+  string/memcmp.c
+  string/memmem.c
+  string/mempcpy.c
+  string/memrchr.c
+  string/rawmemchr.c
+  string/stpcpy.c
+  string/stpncpy.c
+  string/str-two-way.h
+  string/strcasestr.c
+  string/strcspn.c
+  string/strdup.c
+  string/strndup.c
+  string/strpbrk.c
+  string/strsignal.c
+  string/strstr.c
+  string/strtok_r.c
+  string/strverscmp.c
+  sysdeps/generic/pty-private.h
+  sysdeps/generic/siglist.h
+  sysdeps/posix/euidaccess.c
+  sysdeps/posix/gai_strerror.c
+  sysdeps/posix/getcwd.c
+  sysdeps/posix/pwrite.c
+  sysdeps/posix/spawni.c
+  # Merged from gnulib 2014-6-23
+  sysdeps/posix/tempname.c
+  # Merged from gnulib 2014-6-27
+  time/mktime.c
+  time/strptime.c
+  time/timegm.c
+
+# The last merge was 2014-12-11 and merged gettext 0.19.3 into glibc with a
+# patch submitted to the gettext mailing list for changes that could be merged
+# back.
+#
+# This commit was omitted from the merge as it does not appear to be compatible
+# with how glibc expects things to work:
+#
+# commit 279b57fc367251666f00e8e2b599b83703451afb
+# Author: Bruno Haible <bruno@clisp.org>
+# Date:   Fri Jun 14 12:03:49 2002 +0000
+#
+#     Make absolute pathnames inside $LANGUAGE work.
+gettext:
+  intl/bindtextdom.c
+  intl/dcgettext.c
+  intl/dcigettext.c
+  intl/dcngettext.c
+  intl/dgettext.c
+  intl/dngettext.c
+  intl/explodename.c
+  intl/finddomain.c
+  intl/gettext.c
+  intl/gettextP.h
+  intl/gmo.h
+  intl/hash-string.c
+  intl/hash-string.h
+  intl/l10nflist.c
+  intl/loadinfo.h
+  intl/loadmsgcat.c
+  intl/locale.alias
+  intl/localealias.c
+  intl/ngettext.c
+  intl/plural-exp.c
+  intl/plural-exp.h
+  intl/plural.y
+  intl/textdomain.c
+
+# The following files are shared with the upstream Unicode project and must be
+# updated regularly to stay in sync with the upstream unicode releases.
+#
+# Merged from Unicode 13.0.0 release.
+unicode:
+  localedata/unicode-gen/UnicodeData.txt
+  localedata/unicode-gen/unicode-license.txt
+  localedata/unicode-gen/DerivedCoreProperties.txt
+  localedata/unicode-gen/EastAsianWidth.txt
+  localedata/unicode-gen/PropList.txt
+
+# The following files are shared with the upstream tzcode project and must be
+# updated regularly to stay in sync with the upstream releases.
+#
+# Update from tzcode 2017b.
+# Latest is 2018g:
+#   https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html
+tzcode:
+  timezone/private.h
+  timezone/tzfile.h
+  timezone/zdump.c
+  timezone/zic.c
+  timezone/tzselect.ksh
+
+# The following files are shared with the upstream tzdata project but is not
+# synchronized regularly. The data files themselves are used only for testing
+# purposes and their data is never used to generate any output. We synchronize
+# them only to stay on top of newer data that might help with testing.
+#
+# Currently synced to 2009i.  Latest is 2018g.
+# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html
+tzdata:
+  timezone/africa
+  timezone/antarctica
+  timezone/asia
+  timezone/australasia
+  timezone/europe
+  timezone/northamerica
+  timezone/southamerica
+  timezone/pacificnew
+  timezone/etcetera
+  timezone/factory
+  timezone/backward
+  timezone/systemv
+  timezone/solar87
+  timezone/solar88
+  timezone/solar89
+  timezone/iso3166.tab
+  timezone/zone.tab
+  timezone/leapseconds
+  # This is yearistype.sh in the parent project
+  timezone/yearistype
diff --git a/scripts/glibc_shared_code.py b/scripts/glibc_shared_code.py
new file mode 100644
index 0000000000..873a26117f
--- /dev/null
+++ b/scripts/glibc_shared_code.py
@@ -0,0 +1,70 @@ 
+#!/usr/bin/python
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+def get_glibc_shared_code(path):
+    """ Get glibc shared code information from a file
+
+    The input file must have project names in their own line ending with a colon
+    and all shared files in the project on their own lines following the project
+    name.  Whitespaces are ignored.  Lines with # as the first non-whitespace
+    character are ignored.
+
+    Args:
+        path: The path to file containing shared code information.
+
+    Returns:
+        A dictionary with project names as key and lists of files as values.
+    """
+
+    projects = {}
+    with open(path, 'r') as f:
+        for line in f.readlines():
+            line = line.strip()
+            if len(line) == 0 or line[0] == '#':
+                continue
+            if line[-1] == ':':
+                cur = line[:-1]
+                projects[cur] = []
+            else:
+                projects[cur].append(line)
+
+    return projects
+
+# Function testing.
+import sys
+from os import EX_NOINPUT
+from os.path import exists
+from pprint import *
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        print('Usage: %s <file name>' % sys.argv[0])
+        print('Run this script from the base glibc source directory')
+        sys.exit(EX_NOINPUT)
+
+    print('Testing get_glibc_shared_code with %s:\n' % sys.argv[1])
+    r = get_glibc_shared_code(sys.argv[1])
+    errors = False
+    for k in r.keys():
+        for f in r[k]:
+            if not exists(f):
+                print('%s does not exist' % f)
+                errors = True
+
+    if not errors:
+        pprint(r)