diff options
-rw-r--r-- | doc/Makefile.am | 36 | ||||
-rw-r--r-- | doc/Makefile.in | 43 | ||||
-rw-r--r-- | doc/basemove.fig | 97 | ||||
-rw-r--r-- | doc/bss.fig | 74 | ||||
-rw-r--r-- | doc/dso1.fig | 81 | ||||
-rw-r--r-- | doc/dso2.fig | 81 | ||||
-rw-r--r-- | doc/dso3.fig | 110 | ||||
-rw-r--r-- | doc/gap.fig | 87 | ||||
-rw-r--r-- | doc/glossary.conf | 3 | ||||
-rw-r--r-- | doc/lineno.sty | 1521 | ||||
-rw-r--r-- | doc/linuxtag.sty | 67 | ||||
-rw-r--r-- | doc/newseg.fig | 84 | ||||
-rw-r--r-- | doc/prelink.pdf | bin | 0 -> 539486 bytes | |||
-rw-r--r-- | doc/prelink.tex | 3643 | ||||
-rw-r--r-- | doc/prelinklisting.sty | 41 | ||||
-rw-r--r-- | src/exec.c | 20 | ||||
-rw-r--r-- | src/main.c | 2 | ||||
-rw-r--r-- | src/prelink.c | 2 |
18 files changed, 5985 insertions, 7 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am index e863fc8..45f4757 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -4,3 +4,39 @@ AUTOMAKE_OPTIONS = 1.4 gnu man_MANS = prelink.8 execstack.8 EXTRA_DIST = prelink.8 execstack.8 prelink.conf +PDFLATEXINPUTS = TEXINPUTS_pdflatex=.:$(srcdir):`kpsewhich --progname=pdflatex --show-path=cnf.pdflatex` +LATEXINPUTS = TEXINPUTS_latex=.:$(srcdir):`kpsewhich --progname=latex --show-path=cnf.latex` + +noinst_DATA = prelink.pdf +pics = gap bss basemove newseg dso1 dso2 dso3 + +prelink.pdf: prelink.tex $(pics:=.fig) linuxtag.sty \ + prelinklisting.sty lineno.sty glossary.conf + $(MAKE) $(pics:=.pdf) + $(PDFLATEXINPUTS) pdflatex $< + makeindex -s $(srcdir)/glossary.conf -o prelink.gls prelink.glo + if grep -q Rerun prelink.log; then $(PDFLATEXINPUTS) pdflatex $<; fi + thumbpdf $@ && $(PDFLATEXINPUTS) pdflatex $< + mv $@ prelinkin.pdf + pdfopt prelinkin.pdf $@ + rm prelinkin.pdf + +prelink.ps: prelink.dvi + dvips $< -o $@ + +prelink.dvi: prelink.tex $(pics:=.eps) + $(LATEXINPUTS) latex $< + if grep -q Rerun prelink.log; then $(LATEXINPUTS) latex $<; fi + +SUFFIXES = .fig .eps .pdf + +.fig.eps: + fig2dev -L eps $^ $@ + +.fig.pdf: + fig2dev -L pdf $^ $@ + +CLEANFILES = $(pics:=.eps) $(pics:=.pdf) prelink.aux prelink.dvi \ + prelink.log prelink.mtc prelink.mtc1 prelink.pdf prelink.ps \ + prelink.glo prelink.idx prelink.out prelink.tpt prelink.ilg \ + prelink.gls prelink.toc diff --git a/doc/Makefile.in b/doc/Makefile.in index a6da1fa..678002b 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -99,6 +99,19 @@ AUTOMAKE_OPTIONS = 1.4 gnu man_MANS = prelink.8 execstack.8 EXTRA_DIST = prelink.8 execstack.8 prelink.conf +PDFLATEXINPUTS = TEXINPUTS_pdflatex=.:$(srcdir):`kpsewhich --progname=pdflatex --show-path=cnf.pdflatex` +LATEXINPUTS = TEXINPUTS_latex=.:$(srcdir):`kpsewhich --progname=latex --show-path=cnf.latex` + +noinst_DATA = prelink.pdf +pics = gap bss basemove newseg dso1 dso2 dso3 + +SUFFIXES = .fig .eps .pdf + +CLEANFILES = $(pics:=.eps) $(pics:=.pdf) prelink.aux prelink.dvi \ + prelink.log prelink.mtc prelink.mtc1 prelink.pdf prelink.ps \ + prelink.glo prelink.idx prelink.out prelink.tpt prelink.ilg \ + prelink.gls prelink.toc + subdir = doc mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs CONFIG_HEADER = $(top_builddir)/config.h @@ -107,10 +120,13 @@ DIST_SOURCES = NROFF = nroff MANS = $(man_MANS) +DATA = $(noinst_DATA) + DIST_COMMON = Makefile.am Makefile.in all: all-am .SUFFIXES: +.SUFFIXES: .fig .eps .pdf $(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) cd $(top_srcdir) && \ $(AUTOMAKE) --gnu doc/Makefile @@ -200,7 +216,7 @@ distdir: $(DISTFILES) done check-am: all-am check: check-am -all-am: Makefile $(MANS) +all-am: Makefile $(MANS) $(DATA) installdirs: $(mkinstalldirs) $(DESTDIR)$(man8dir) @@ -222,6 +238,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -rm -f Makefile $(CONFIG_CLEAN_FILES) @@ -277,6 +294,30 @@ uninstall-man: uninstall-man8 mostlyclean-libtool uninstall uninstall-am uninstall-info-am \ uninstall-man uninstall-man8 + +prelink.pdf: prelink.tex $(pics:=.fig) linuxtag.sty \ + prelinklisting.sty lineno.sty glossary.conf + $(MAKE) $(pics:=.pdf) + $(PDFLATEXINPUTS) pdflatex $< + makeindex -s $(srcdir)/glossary.conf -o prelink.gls prelink.glo + if grep -q Rerun prelink.log; then $(PDFLATEXINPUTS) pdflatex $<; fi + thumbpdf $@ && $(PDFLATEXINPUTS) pdflatex $< + mv $@ prelinkin.pdf + pdfopt prelinkin.pdf $@ + rm prelinkin.pdf + +prelink.ps: prelink.dvi + dvips $< -o $@ + +prelink.dvi: prelink.tex $(pics:=.eps) + $(LATEXINPUTS) latex $< + if grep -q Rerun prelink.log; then $(LATEXINPUTS) latex $<; fi + +.fig.eps: + fig2dev -L eps $^ $@ + +.fig.pdf: + fig2dev -L pdf $^ $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/doc/basemove.fig b/doc/basemove.fig new file mode 100644 index 0000000..6fb1c02 --- /dev/null +++ b/doc/basemove.fig @@ -0,0 +1,97 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +6 5925 2775 10275 3525 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6000 2850 7200 2850 7200 3450 6000 3450 6000 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 10200 2850 8025 2850 8025 3450 10200 3450 10200 2850 +4 0 0 50 0 0 12 0.0000 4 180 1125 6000 3225 .hash, .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 1890 8100 3225 .gnu.version ... .eh_frame\001 +-6 +6 5925 1125 10275 1875 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6000 1200 7200 1200 7200 1800 6000 1800 6000 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 10200 1200 8025 1200 8025 1800 10200 1800 10200 1200 +4 0 0 50 0 0 12 0.0000 4 180 1125 6000 1575 .hash, .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 1890 8100 1575 .gnu.version ... .eh_frame\001 +-6 +6 525 2775 2400 3525 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 2850 2325 2850 2325 3450 600 3450 600 2850 +4 0 0 50 0 0 12 0.0000 4 180 1605 675 3225 .interp, .note.ABI-tag\001 +-6 +6 4200 1125 6075 1875 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4275 1200 6000 1200 6000 1800 4275 1800 4275 1200 +4 0 0 50 0 0 12 0.0000 4 180 1605 4350 1575 .interp, .note.ABI-tag\001 +-6 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 2325 2850 3300 2850 3300 3450 2325 3450 2325 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4125 2850 3300 2850 3300 3450 4125 3450 4125 2850 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 4125 2850 5100 2850 5100 3450 4125 3450 4125 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 10200 2700 10200 3600 450 3600 450 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 10800 2850 11925 2850 11925 3450 10800 3450 10800 2850 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 11925 2700 11925 3600 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 11925 2850 12525 2850 12525 3450 11925 3450 11925 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 10800 1200 11925 1200 11925 1800 10800 1800 10800 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 10800 975 12525 975 12525 1950 10800 1950 10800 975 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 11925 975 11925 1950 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 11925 1200 12525 1200 12525 1800 11925 1800 11925 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 10800 2700 12525 2700 12525 3600 10800 3600 10800 2700 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 7200 1200 8025 1200 8025 1800 7200 1800 7200 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 10200 2700 10200 3600 450 3600 450 2700 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 4125 975 10200 975 10200 1950 4125 1950 4125 975 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 5025 1950 1350 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 6600 1950 6600 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 11 + 0 0 1.00 60.00 120.00 + 9000 1950 9000 2025 9000 2100 9000 2175 9000 2250 9000 2325 + 9000 2400 9000 2475 9000 2550 9000 2625 9000 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 12 + 0 0 1.00 60.00 120.00 + 11400 1950 11400 2025 11400 2100 11400 2175 11400 2250 11400 2325 + 11400 2400 11400 2475 11400 2550 11400 2625 11400 2700 11400 2625 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 11 + 0 0 1.00 60.00 120.00 + 12225 1950 12225 2025 12225 2100 12225 2175 12225 2250 12225 2325 + 12225 2400 12225 2475 12225 2550 12225 2625 12225 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 8025 1950 3300 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 7200 1950 2325 2700 +4 0 0 50 0 0 12 0.0000 4 180 780 3300 3225 .gnu.liblist\001 +4 0 0 50 0 0 12 0.0000 4 180 930 4125 3225 .gnu.conflict\001 +4 0 0 50 0 0 12 0.0000 4 180 885 10950 3225 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 12000 3225 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 885 10950 1575 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 12000 1575 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 525 7275 1575 .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 525 2475 3225 .dynstr\001 diff --git a/doc/bss.fig b/doc/bss.fig new file mode 100644 index 0000000..2c9325f --- /dev/null +++ b/doc/bss.fig @@ -0,0 +1,74 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 1200 2175 1200 2175 1800 600 1800 600 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 3225 1200 2175 1200 2175 1800 3225 1800 3225 1200 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 1425 1950 1425 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 2850 2175 2850 2175 3450 600 3450 600 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 3000 2850 2175 2850 2175 3450 3000 3450 3000 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 5400 2700 5400 3600 450 3600 450 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5400 2850 3225 2850 3225 3450 5400 3450 5400 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 975 5400 975 5400 1950 450 1950 450 975 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5400 1200 3225 1200 3225 1800 5400 1800 5400 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 6000 1200 7125 1200 7125 1800 6000 1800 6000 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 6000 2850 7125 2850 7125 3450 6000 3450 6000 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6000 975 7725 975 7725 1950 6000 1950 6000 975 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 6525 1950 6525 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 7350 1950 7350 2700 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 7125 975 7125 1950 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 8925 2850 7725 2850 7725 3450 8925 3450 8925 2850 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 8925 2850 10050 2850 10050 3450 8925 3450 8925 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6000 2700 10050 2700 10050 3600 6000 3600 6000 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2175 1950 7725 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3225 1950 8925 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 11 + 0 0 1.00 60.00 120.00 + 3975 1950 3975 2025 3975 2100 3975 2175 3975 2250 3975 2325 + 3975 2400 3975 2475 3975 2550 3975 2625 3975 2700 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 7125 2850 7725 2850 7725 3450 7125 3450 7125 2850 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 7125 1200 7725 1200 7725 1800 7125 1800 7125 1200 +4 0 0 50 0 0 12 0.0000 4 180 1335 750 1575 .interp ... .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 525 2325 1575 .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 1335 750 3225 .interp ... .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 780 2175 3225 .gnu.liblist\001 +4 0 0 50 0 0 12 0.0000 4 180 1890 3300 3225 .gnu.version ... .eh_frame\001 +4 0 0 50 0 0 12 0.0000 4 180 1890 3300 1575 .gnu.version ... .eh_frame\001 +4 0 0 50 0 0 12 0.0000 4 180 885 6150 1575 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 180 885 6150 3225 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 7200 1575 .bss\001 +4 0 0 50 0 0 12 0.0000 4 135 315 7275 3225 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 525 7950 3225 .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 930 9000 3225 .gnu.conflict\001 diff --git a/doc/dso1.fig b/doc/dso1.fig new file mode 100644 index 0000000..0bd75f3 --- /dev/null +++ b/doc/dso1.fig @@ -0,0 +1,81 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +6 6375 2625 8250 3675 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 6450 2850 7575 2850 7575 3450 6450 3450 6450 2850 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 7575 2700 7575 3600 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 7575 2850 8175 2850 8175 3450 7575 3450 7575 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6450 2700 8175 2700 8175 3600 6450 3600 6450 2700 +4 0 0 50 0 0 12 0.0000 4 180 885 6600 3225 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 7650 3225 .bss\001 +-6 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 975 4950 975 4950 1950 450 1950 450 975 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 1200 2100 1200 2100 1800 600 1800 600 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4950 1200 2775 1200 2775 1800 4950 1800 4950 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 2850 2100 2850 2100 3450 600 3450 600 2850 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 2100 1200 2775 1200 2775 1800 2100 1800 2100 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 2100 2850 3075 2850 3075 3450 2100 3450 2100 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5250 2850 3075 2850 3075 3450 5250 3450 5250 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 5250 2700 5250 3600 450 3600 450 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 6150 1200 7275 1200 7275 1800 6150 1800 6150 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6150 975 7875 975 7875 1950 6150 1950 6150 975 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 7275 975 7275 1950 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 7275 1200 7875 1200 7875 1800 7275 1800 7275 1200 +2 1 4 1 0 7 50 0 -1 4.000 0 0 -1 0 0 1 + 4800 675 +2 1 4 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 + 4800 750 4800 3825 +2 1 5 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 + 6000 675 6000 3750 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 1350 1950 1350 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2100 1950 2100 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2775 1950 3075 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 1 + 3375 1950 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3375 1950 3675 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 7050 1950 7350 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 7575 1950 7875 2700 +4 0 0 50 0 0 12 0.0000 4 180 1155 675 1575 .hash ... .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 555 2175 1575 .rel.dyn\001 +4 0 0 50 0 0 12 0.0000 4 180 1440 2850 1575 .text ... ro_seg_end\001 +4 0 0 50 0 0 12 0.0000 4 180 1155 675 3225 .hash ... .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 555 2175 3225 .rel.dyn\001 +4 0 0 50 0 0 12 0.0000 4 180 1440 3225 3225 .text ... ro_seg_end\001 +4 0 0 50 0 0 12 0.0000 4 180 885 6300 1575 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 7350 1575 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 1095 3675 2475 page boundary\001 +4 0 0 50 0 0 12 0.0000 4 180 1095 6075 2475 page boundary\001 diff --git a/doc/dso2.fig b/doc/dso2.fig new file mode 100644 index 0000000..73d0a14 --- /dev/null +++ b/doc/dso2.fig @@ -0,0 +1,81 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +6 6225 2625 8100 3675 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 6300 2850 7425 2850 7425 3450 6300 3450 6300 2850 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 7425 2700 7425 3600 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 7425 2850 8025 2850 8025 3450 7425 3450 7425 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6300 2700 8025 2700 8025 3600 6300 3600 6300 2700 +4 0 0 50 0 0 12 0.0000 4 180 885 6450 3225 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 7500 3225 .bss\001 +-6 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 975 4950 975 4950 1950 450 1950 450 975 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 1200 2100 1200 2100 1800 600 1800 600 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4950 1200 2775 1200 2775 1800 4950 1800 4950 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 2850 2100 2850 2100 3450 600 3450 600 2850 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 2100 1200 2775 1200 2775 1800 2100 1800 2100 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 2100 2850 3075 2850 3075 3450 2100 3450 2100 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5250 2850 3075 2850 3075 3450 5250 3450 5250 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 5250 2700 5250 3600 450 3600 450 2700 +2 1 4 1 0 7 50 0 -1 4.000 0 0 -1 0 0 1 + 4800 675 +2 1 4 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 + 4800 750 4800 3825 +2 1 5 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 + 6000 675 6000 3750 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 1350 1950 1350 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2100 1950 2100 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2775 1950 3075 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 1 + 3375 1950 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3375 1950 3675 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 6000 1200 7125 1200 7125 1800 6000 1800 6000 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6000 975 7725 975 7725 1950 6000 1950 6000 975 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 7125 975 7125 1950 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 7125 1200 7725 1200 7725 1800 7125 1800 7125 1200 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 6900 1950 7200 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 7425 1950 7725 2700 +4 0 0 50 0 0 12 0.0000 4 180 1155 675 1575 .hash ... .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 555 2175 1575 .rel.dyn\001 +4 0 0 50 0 0 12 0.0000 4 180 1440 2850 1575 .text ... ro_seg_end\001 +4 0 0 50 0 0 12 0.0000 4 180 1155 675 3225 .hash ... .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 555 2175 3225 .rel.dyn\001 +4 0 0 50 0 0 12 0.0000 4 180 1440 3225 3225 .text ... ro_seg_end\001 +4 0 0 50 0 0 12 0.0000 4 180 1095 3675 2475 page boundary\001 +4 0 0 50 0 0 12 0.0000 4 180 885 6150 1575 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 7200 1575 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 1095 6075 2475 page boundary\001 diff --git a/doc/dso3.fig b/doc/dso3.fig new file mode 100644 index 0000000..ffdb9de --- /dev/null +++ b/doc/dso3.fig @@ -0,0 +1,110 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +6 375 4125 6900 5175 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 4350 2100 4350 2100 4950 600 4950 600 4350 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 2100 4350 3075 4350 3075 4950 2100 4950 2100 4350 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4950 4350 3075 4350 3075 4950 4950 4950 4950 4350 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 4200 4950 4200 4950 5100 450 5100 450 4200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5100 4350 6225 4350 6225 4950 5100 4950 5100 4350 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 6225 4350 6825 4350 6825 4950 6225 4950 6225 4350 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 5100 4200 6825 4200 6825 5100 5100 5100 5100 4200 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 6225 4200 6225 5100 +4 0 0 50 0 0 12 0.0000 4 180 1155 675 4725 .hash ... .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 555 2175 4725 .rel.dyn\001 +4 0 0 50 0 0 12 0.0000 4 180 1440 3150 4725 .text ... ro_seg_end\001 +4 0 0 50 0 0 12 0.0000 4 180 885 5250 4724 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 6300 4724 .bss\001 +-6 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 1200 2100 1200 2100 1800 600 1800 600 1200 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 2100 1200 2775 1200 2775 1800 2100 1800 2100 1200 +2 1 4 1 0 7 50 0 -1 4.000 0 0 -1 0 0 1 + 4800 675 +2 1 5 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 + 6000 675 6000 5250 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 1350 1950 1350 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2100 1950 2100 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2775 1950 3075 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 1 + 3375 1950 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3375 1950 4500 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 5400 1950 6600 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 6300 1950 7500 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4650 1200 2775 1200 2775 1800 4650 1800 4650 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 975 4650 975 4650 1950 450 1950 450 975 +2 1 4 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 + 4800 750 4800 5250 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4800 1200 5925 1200 5925 1800 4800 1800 4800 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 4800 975 6525 975 6525 1950 4800 1950 4800 975 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 5925 975 5925 1950 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 5925 1200 6525 1200 6525 1800 5925 1800 5925 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 2850 2100 2850 2100 3450 600 3450 600 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 2100 2850 3075 2850 3075 3450 2100 3450 2100 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5850 2850 3975 2850 3975 3450 5850 3450 5850 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 5850 2700 5850 3600 450 3600 450 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 6000 2850 7125 2850 7125 3450 6000 3450 6000 2850 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 7125 2850 7725 2850 7725 3450 7125 3450 7125 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 6000 2700 7725 2700 7725 3600 6000 3600 6000 2700 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 7125 2700 7125 3600 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 5475 5400 5475 5175 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 5475 5400 4950 5250 +4 0 0 50 0 0 12 0.0000 4 180 1155 675 1575 .hash ... .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 555 2175 1575 .rel.dyn\001 +4 0 0 50 0 0 12 0.0000 4 180 1440 2850 1575 .text ... ro_seg_end\001 +4 0 0 50 0 0 12 0.0000 4 180 1095 3675 2475 page boundary\001 +4 0 0 50 0 0 12 0.0000 4 180 1095 6075 2475 page boundary\001 +4 0 0 50 0 0 12 0.0000 4 180 885 4950 1575 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 6000 1575 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 1155 675 3225 .hash ... .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 555 2175 3225 .rel.dyn\001 +4 0 0 50 0 0 12 0.0000 4 180 1440 4050 3225 .text ... ro_seg_end\001 +4 0 0 50 0 0 12 0.0000 4 180 885 6150 3224 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 7200 3224 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 3450 3675 5550 This page needs to be mapped from 2 sources\001 +4 0 0 50 0 0 12 0.0000 4 135 645 1125 4050 And not:\001 diff --git a/doc/gap.fig b/doc/gap.fig new file mode 100644 index 0000000..ced67c9 --- /dev/null +++ b/doc/gap.fig @@ -0,0 +1,87 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 1200 2175 1200 2175 1800 600 1800 600 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4425 1200 3225 1200 3225 1800 4425 1800 4425 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 3225 1200 2175 1200 2175 1800 3225 1800 3225 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 975 9225 975 9225 1950 450 1950 450 975 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 9225 1200 7425 1200 7425 1800 9225 1800 9225 1200 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 1425 1950 1425 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2175 1950 4425 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3225 1950 5700 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3750 1950 3750 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 11 + 0 0 1.00 60.00 120.00 + 8250 1950 8250 2025 8250 2100 8250 2175 8250 2250 8250 2325 + 8250 2400 8250 2475 8250 2550 8250 2625 8250 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 2850 2175 2850 2175 3450 600 3450 600 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 3000 2850 2175 2850 2175 3450 3000 3450 3000 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4425 2850 3225 2850 3225 3450 4425 3450 4425 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5700 2850 4425 2850 4425 3450 5700 3450 5700 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5700 2850 6900 2850 6900 3450 5700 3450 5700 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 7425 2850 9225 2850 9225 3450 7425 3450 7425 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 9225 2700 9225 3600 450 3600 450 2700 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 9825 1200 10950 1200 10950 1800 9825 1800 9825 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 9825 2850 10950 2850 10950 3450 9825 3450 9825 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 9825 975 11550 975 11550 1950 9825 1950 9825 975 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 9825 2700 11550 2700 11550 3600 9825 3600 9825 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 10350 1950 10350 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 11175 1950 11175 2700 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 10950 975 10950 1950 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 13 + 10950 2700 10950 2775 10950 2850 10950 2925 10950 3000 10950 3075 + 10950 3150 10950 3225 10950 3300 10950 3375 10950 3450 10950 3525 + 10950 3600 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 10950 1200 11550 1200 11550 1800 10950 1800 10950 1200 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 10950 2850 11550 2850 11550 3450 10950 3450 10950 2850 +4 0 0 50 0 0 12 0.0000 4 180 1335 750 1575 .interp ... .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 525 2325 1575 .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 1005 3300 1575 .gnu.version*\001 +4 0 0 50 0 0 12 0.0000 4 180 1530 7575 1575 .rel.dyn ... .eh_frame\001 +4 0 0 50 0 0 12 0.0000 4 180 1335 750 3225 .interp ... .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 780 2175 3225 .gnu.liblist\001 +4 0 0 50 0 0 12 0.0000 4 180 1005 3300 3225 .gnu.version*\001 +4 0 0 50 0 0 12 0.0000 4 180 525 4575 3225 .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 930 5775 3225 .gnu.conflict\001 +4 0 0 50 0 0 12 0.0000 4 180 1530 7575 3225 .rel.dyn ... .eh_frame\001 +4 0 0 50 0 0 12 0.0000 4 180 885 9975 1575 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 180 885 9975 3225 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 11025 1575 .bss\001 +4 0 0 50 0 0 12 0.0000 4 135 315 11100 3225 .bss\001 diff --git a/doc/glossary.conf b/doc/glossary.conf new file mode 100644 index 0000000..477b529 --- /dev/null +++ b/doc/glossary.conf @@ -0,0 +1,3 @@ +keyword "\\glossaryentry" +preamble "\n \\begin{theglossary}\n" +postamble "\n\n \\end{theglossary}\n" diff --git a/doc/lineno.sty b/doc/lineno.sty new file mode 100644 index 0000000..0805bcc --- /dev/null +++ b/doc/lineno.sty @@ -0,0 +1,1521 @@ +\iffalse; awk '/S[H]ELL/' lineno.sty|sh;exit;\fi +%%% To pretty-print this file, feed it to a unix shell! +%%% +%%% $Id: lineno.sty,v 3.12 2003/01/14 21:11:53 stephan Exp $ +%%% +%%% Copyright 1995--2001 Stephan I. B"ottcher <stephan@nevis.columbia.edu> +%%% +%%% This program can be redistributed and/or modified under the terms +%%% of the LaTeX Project Public License Distributed from CTAN +%%% archives in directory macros/latex/base/lppl.txt; either +%%% version 1 of the License, or any later version. +%%% +% \documentclass[a4paper,12pt]{article}%D +% \usepackage{lineno}%D +% +% \title{ +% \texttt{\itshape +% lineno.sty \ v3.09 2003/01/14 +% }\\\ \\ +% A \LaTeX\ package to attach +% \\ Line numbers to paragraphs +% }\author{ +% Stephan I. B\"ottcher +% }\date{ +% boettcher@physik.uni-kiel.de +%% \\ stephan@nevis.columbia.edu +%% \\ Stephan.Boettcher@cern.ch +% \\} +% +% +% \def~{\verb~} +% \catcode`\<\catcode`\~ +% \def<#1>{$\langle${\itshape#1}\/$\rangle$} +% \catcode`\|\catcode`\~ +% \def|#1{{\ttfamily\string#1}} +% \newenvironment{code} +% {\par\runninglinenumbers +% \modulolinenumbers[1] +% \linenumbersep.3em +% \footnotesize +% \def\linenumberfont +% {\normalfont\tiny\itshape}} +% {} +% +% \begin{document}%D +%% \DocInput{lineno.doc}%D +% \pagewiselinenumbers +% \maketitle +% \tableofcontents +% \sloppy +% +% +% +% \section{ +% Introduction +% } +% This package provides line numbers on paragraphs. +% After \TeX\ has broken a paragraph into lines there will +% be line numbers attached to them, with the possibility to +% make references through the \LaTeX\ ~\ref~, ~\pageref~ +% cross reference mechanism. This includes four issues: +% \begin{itemize} +% \item attach a line number on each line, +% \item create references to a line number, +% \item control line numbering mode, +% \item count the lines and print the numbers. +% \end{itemize} +% The first two points are implemented through patches to +% the output routine. The third by redefining ~\par~, ~\@par~ +% and ~\@@par~. The counting is easy, as long as you want +% the line numbers run through the text. If they shall +% start over at the top of each page, the aux-file as well +% as \TeX s memory have to carry a load for each counted line. +% +% I wrote this package for my wife Petra, who needs it for +% transcriptions of interviews. This allows her to +% precisely refer to passages in the text. It works well +% together with ~\marginpar~s, but not to well with displaymath. +% ~\footnote~s are a problem, especially when they +% are split, but we may get there. +% +% lineno.sty works +% surprisingly well with other packages, for +% example, ~wrapfig.sty~. So please try if it +% works with whatever you need, and if it does, +% please tell me, and if it does not, tell me as +% well, so I can try to fix it. +% +% This style option is written for \LaTeXe, later than November 1994, +% since we need the ~\protected@write~ macro. + +\NeedsTeXFormat{LaTeX2e}[1994/11/04] +\ProvidesPackage{lineno} + [2003/01/14 line numbers on paragraphs v3.09] + +%% v1.00 1995/03/31 SIB: first release for Petras interview transcriptions +%% v1.01 1995/10/28 SIB: added ~pagewise~ mode +%% v1.02 1995/11/15 SIB: added ~modulo~ option +%% v1.03 1995/12/05 SIB: pagewise: try to reduce the hash-size requirements +%% v2.00 1995/12/06 SIB: .. it works, new user interface +%% v2.01 1996/09/17 SIB: put into CVS +%% v2.02 1997/03/17 SIB: add: \@reinserts, for footnotes +%% v2.04 1998/03/09 SIB: add: linenomath environment +%% v2.05 1998/04/26 SIB: add: prevgraf test +%% v2.06 1999/03/02 SIB: LPPL added +%% v3.00 1999/06/11 SiB: include the extension in the main file +%% v3.01 1999/08/28 SiB: \@reinserts -> \holdinginserts +%% v3.02 2000/03/10 SiB: \@LN@output +%% v3.03 2000/07/01 SiB: \@LN@ExtraLabelItems, hyperref +%% v3.04 2000/12/17 SiB: longtable compatibility. +%% v3.05 2001/01/02 SiB: [fleqn] detection. +%% v3.05a 2001/01/04 SiB: [fleqn] detection reverted for eqnarray. +%% v3.06 2001/01/17 SiB: [twocolumn] mode support. +%% v3.07 2001/07/30 SiB: [hyperref] option obsoleted. +%% v3.08 2001/08/02 SiB: linenomath wrapping for \[ \] +%% v3.08a 2001/08/04 SiB: linenomath wrapping for \[ \] fixed +%% v3.08b 2002/01/27 SiB: enquotation typo fix +%% v3.09 2003/01/14 SIB: hyperref detection fix +%% +%% Acknowledgements: +%% v3.06: Donald Arseneau, pointed to mparhack.sty. +%% v3.07+: Frank Mittelbach, points out inconsistencies in the +%% user interface. +% +% \section{ +% Put the line numbers to the lines +% } +% The line numbers have to be attached by the output +% routine. We simply set the ~\interlinepenalty~ to -100000. +% The output routine will be called after each line in the +% paragraph, except the last, where we trigger by ~\par~. +% The ~\linenopenalty~ is small enough to compensate a bunch of +% penalties (e.g., with ~\samepage~). +% +% (New v3.04) Longtable uses +% ~\penaly~-30000. The lineno penalty range was +% shrunk to $-188000 \dots -32000$. (/New v3.04) + +\newcount\linenopenalty\linenopenalty=-100000 +\mathchardef\linenopenaltypar=32000 + +% So let's make a hook to ~\output~, the direct way. The \LaTeX\ +% macro ~\@reinserts~ puts the footnotes back on the page. +% +% (New v3.01) ~\@reinserts~ badly +% screws up split footnotes. The bottom part is +% still on the recent contributions list, and the +% top part will be put back there after the bottom +% part. Thus, since lineno.sty does not play well +% with ~\inserts~ anyway, we can safely experiment +% with ~\holdinginserts~, without making things +% much worse. +% +% Or that's what I thought, but: Just activating +% ~\holdinginserts~ while doing the ~\par~ will +% not do the trick: The ~\output~ routine may be +% called for a real page break before all line +% numbers are done, and how can we get control +% over ~\holdinginserts~ at that point? +% +% Let's try this: When the ~\output~ routine is +% run with ~\holdinginserts=3~ for a real page +% break, then we reset ~\holdinginserts~ and +% restart ~\output~. +% +% Then, again, how do we keep the remaining +% ~\inserts~ while doing further line numbers? +% +% If we find ~\holdinginserts~=-3 we activate it again +% after doing ~\output~. (/New v3.01) +% +% (New v3.02) To work with +% multicol.sty, the original output routine is now +% called indirectly, instead of being replaced. +% When multicol.sty changes ~\output~, it is a +% toks register, not the real thing. (/New v3.02) + +\let\@LN@output\output +\newtoks\output +\output=\expandafter{\the\@LN@output} +\@LN@output={% + \LineNoTest + \if@tempswa + \LineNoHoldInsertsTest + \if@tempswa + \if@twocolumn\let\@makecol\@LN@makecol\fi + \the\output + \ifnum\holdinginserts=-3 + \global\holdinginserts 3 + \fi + \else + \global\holdinginserts-3 + \unvbox\@cclv + \ifnum\outputpenalty=10000\else + \penalty\outputpenalty + \fi + \fi + \else + \MakeLineNo + \fi + } + +% The float mechanism inserts ~\interlinepenalty~s during +% ~\output~. So carefully reset it before going on. Else +% we get doubled line numbers on every float placed in +% horizontal mode, e.g, from ~\linelabel~. +% +% Sorry, neither a ~\linelabel~ nor a ~\marginpar~ should +% insert a penalty, else the following linenumber +% could go to the next page. Nor should any other +% float. So let us suppress the ~\interlinepenalty~ +% altogether with the ~\@nobreak~ switch. +% +% Since (ltspace.dtx, v1.2p)[1996/07/26], the ~\@nobreaktrue~ does +% it's job globally. We need to do it locally here. + +\def\LineNoTest{% + \let\@@par\@@@par + \ifnum\interlinepenalty<-\linenopenaltypar + \advance\interlinepenalty-\linenopenalty + \my@nobreaktrue + \fi + \@tempswatrue + \ifnum\outputpenalty>-\linenopenaltypar\else + \ifnum\outputpenalty>-188000\relax + \@tempswafalse + \fi + \fi + } + +\def\my@nobreaktrue{\let\if@nobreak\iftrue} + +\def\LineNoHoldInsertsTest{% + \ifnum\holdinginserts=3\relax + \@tempswafalse + \fi + } + +% We have to return all the page to the current page, and +% add a box with the line number, without adding +% breakpoints, glue or space. The depth of our line number +% should be equal to the previous depth of the page, in +% case the page breaks here, and the box has to be moved up +% by that depth. +% +% The ~\interlinepenalty~ comes after the ~\vadjust~ from a +% ~\linelabel~, so we increment the line number \emph{after} +% printing it. The macro ~\makeLineNumber~ produces the +% text of the line number, see section \ref{appearance}. +% +% Finally we put in the natural ~\interlinepenalty~, except +% after the last line. + +\def\MakeLineNo{\@tempdima\dp\@cclv \unvbox\@cclv + \sbox\@tempboxa{\hbox to\z@{\makeLineNumber}}% + \stepcounter{linenumber}% + \dp\@tempboxa=\@tempdima\ht\@tempboxa=\z@ + \nointerlineskip\kern-\@tempdima\box\@tempboxa + \ifnum\outputpenalty=-\linenopenaltypar\else + \@tempcnta\outputpenalty + \advance\@tempcnta -\linenopenalty + \penalty\@tempcnta + \fi + } + +% +% +% \section{ +% Control line numbering +% } +% The line numbering is controlled via ~\par~. \LaTeX\ +% saved the \TeX-primitive ~\par~ in ~\@@par~. We push it +% one level further out, and redefine ~\@@par~ to insert +% the ~\interlinepenalty~ needed to trigger the +% line numbering. And we need to allow pagebreaks after a +% paragraph. +% +% New (2.05beta): the prevgraf test. A paragraph that ends with a +% displayed equation, a ~\noindent\par~ or ~wrapfig.sty~ produce empty +% paragraphs. These should not get a spurious line number via +% ~\linenopenaltypar~. + +\let\@@@par\@@par +\newcount\linenoprevgraf + +\def\linenumberpar{\ifvmode\@@@par\else\ifinner\@@@par\else + \advance\interlinepenalty \linenopenalty + \linenoprevgraf\prevgraf + \global\holdinginserts3% + \@@@par + \ifnum\prevgraf>\linenoprevgraf + \penalty-\linenopenaltypar + \fi + \kern\z@ + \global\holdinginserts0% + \advance\interlinepenalty -\linenopenalty + \fi\fi + } + +% The basic commands to enable and disable line numbers. +% ~\@par~ and ~\par~ are only touched, when they are ~\let~ +% to ~\@@@par~/~\linenumberpar~. The line number may be +% reset to 1 with the star-form, or set by an optional +% argument ~[~<number>~]~. + +\def\linenumbers{\let\@@par\linenumberpar + \ifx\@par\@@@par\let\@par\linenumberpar\fi + \ifx\par\@@@par\let\par\linenumberpar\fi + \@ifnextchar[{\resetlinenumber}%] + {\@ifstar{\resetlinenumber}{}}% + } + +\def\nolinenumbers{\let\@@par\@@@par + \ifx\@par\linenumberpar\let\@par\@@@par\fi + \ifx\par\linenumberpar\let\par\@@@par\fi + } + +% What happens with a display math? Since ~\par~ is not executed, +% when breaking the lines before a display, they will not get +% line numbers. Sorry, but I do not dare to change +% ~\interlinepenalty~ globally, nor do I want to redefine +% the display math environments here. +% \begin{displaymath} +% display \ math +% \end{displaymath} +% See the subsection below, for a wrapper enviroment to make +% it work. But that requires to wrap each and every display +% in your LaTeX source. +% +% The next two commands are provided to turn on line +% numbering in a specific mode. Please note the difference: +% for pagewise numbering, ~\linenumbers~ comes first to +% inhibit it from seeing optional arguments, since +% re-/presetting the counter is useless. + +\def\pagewiselinenumbers{\linenumbers\setpagewiselinenumbers} +\def\runninglinenumbers{\setrunninglinenumbers\linenumbers} + +% Finally, it is a \LaTeX\ style, so we provide for the use +% of environments, including the suppression of the +% following paragraph's indentation. + +%%% TO DO: add \par to \linenumbers, if called from an environment. +%%% To DO: add an \@endpe hack if \linenumbers are turned on +%%% in horizontal mode. {\par\parskip\z@\noindent} or +%%% something. + +\@namedef{linenumbers*}{\par\linenumbers*} +\@namedef{runninglinenumbers*}{\par\runninglinenumbers*} + +\def\endlinenumbers{\par\@endpetrue} +\let\endrunninglinenumbers\endlinenumbers +\let\endpagewiselinenumbers\endlinenumbers +\expandafter\let\csname endlinenumbers*\endcsname\endlinenumbers +\expandafter\let\csname endrunninglinenumbers*\endcsname\endlinenumbers +\let\endnolinenumbers\endlinenumbers + +% +% \subsection{ +% Display math +% } +% +% Now we tackle the problem to get display math working. +% There are different options. +% \begin{enumerate}\item[ +% 1.] Precede every display math with a ~\par~. +% Not too good. +% \item[ +% 2.] Change ~\interlinepenalty~ and associates globally. +% Unstable. +% \item[ +% 3.] Wrap each display math with a ~{linenomath}~ +% environment. +% \end{enumerate} +% We'll go for option 3. See if it works: +% \begin{linenomath} +% \begin{equation} +% display \ math +% \end{equation} +% \end{linenomath} +% The star form ~{linenomath*}~ should also number the lines +% of the display itself, +% \begin{linenomath*} +% \begin{eqnarray} +% multi && line \\ +% display && math \\ +% & +% \begin{array}{c} +% with \\ +% array +% \end{array} +% & +% \end{eqnarray} +% \end{linenomath*} +% including multline displays. +% +% First, here are two macros to turn +% on linenumbering on paragraphs preceeding displays, with +% numbering the lines of the display itself, or without. +% The ~\ifx..~ tests if line numbering is turned on. It +% does not harm to add these wrappers in sections that are +% not numbered. Nor does it harm to wrap a display +% twice, e.q, in case you have some ~{equation}~s wrapped +% explicitely, and later you redefine ~\equation~ to do it +% automatically. + +\newcommand\linenomathNonumbers{% + \ifx\@@par\@@@par\else + \ifnum\interlinepenalty>-\linenopenaltypar + \global\holdinginserts3% + \advance\interlinepenalty \linenopenalty + \advance\predisplaypenalty \linenopenalty + \fi + \fi + \ignorespaces + } + +\newcommand\linenomathWithnumbers{% + \ifx\@@par\@@@par\else + \ifnum\interlinepenalty>-\linenopenaltypar + \global\holdinginserts3% + \advance\interlinepenalty \linenopenalty + \advance\predisplaypenalty \linenopenalty + \advance\postdisplaypenalty \linenopenalty + \advance\interdisplaylinepenalty \linenopenalty + \fi + \fi + \ignorespaces + } + +% The ~{linenomath}~ environment has two forms, with and +% without a star. The following two macros define the +% environment, where the stared/non-stared form does/doesn't number the +% lines of the display or vice versa. + +\newcommand\linenumberdisplaymath{% + \def\linenomath{\linenomathWithnumbers}% + \@namedef{linenomath*}{\linenomathNonumbers}% + } + +\newcommand\nolinenumberdisplaymath{% + \def\linenomath{\linenomathNonumbers}% + \@namedef{linenomath*}{\linenomathWithnumbers}% + } + +\def\endlinenomath{% + \global\holdinginserts0 + \@ignoretrue +} +\expandafter\let\csname endlinenomath*\endcsname\endlinenomath + +% The default is not to number the lines of a display. But +% the package option ~mathlines~ may be used to switch +% that behavior. + +\nolinenumberdisplaymath + +% +% +% \section{ +% Line number references +% } +% The only way to get a label to a line number in a +% paragraph is to ask the output routine to mark it. +% +% We use the marginpar mechanism to hook to ~\output~ for a +% second time. Marginpars are floats with number $-1$, we +% fake marginpars with No $-2$. Originally, every negative +% numbered float was considered to be a marginpar. +% +% The float box number ~\@currbox~ is used to transfer the +% label name in a macro called ~\@LNL@~<box-number>. +% +% A ~\newlabel~ is written to the aux-file. The reference +% is to ~\theLineNumber~, \emph{not} ~\thelinenumber~. +% This allows to hook in, as done below for pagewise line +% numbering. +% +% (New v3.03) The ~\@LN@ExtraLabelItems~ are added for a hook +% to keep packages like ~{hyperref}~ happy. (/New v3.03) + +\let\@LN@addmarginpar\@addmarginpar +\def\@addmarginpar{% + \ifnum\count\@currbox>-2\relax + \expandafter\@LN@addmarginpar + \else + \@cons\@freelist\@currbox + \protected@write\@auxout{}{% + \string\newlabel + {\csname @LNL@\the\@currbox\endcsname}% + {{\theLineNumber}{\thepage}\@LN@ExtraLabelItems}}% + \fi} + +\let\@LN@ExtraLabelItems\@empty + +% \subsection{ +% The linelabel command +% } +% To refer to a place in line ~\ref{~<foo>~}~ at page +% ~\pageref{~<foo>~}~ you place a ~\linelabel{~<foo>~}~ at +% that place. +% +% \linelabel{demo} +% \marginpar{\tiny\raggedright +% See if it works: This paragraph +% starts on page \pageref{demo}, line +% \ref{demo}. +% }% +% If you use this command outside a ~\linenumbers~ +% paragraph, you will get references to some bogus +% line numbers, sorry. But we don't disable the command, +% because only the ~\par~ at the end of a paragraph may +% decides whether to print line numbers on this paragraph +% or not. A ~\linelabel~ may legally appear earlier than +% ~\linenumbers~. +% +% ~\linelabel~, via a fake float number $-2$, puts a +% ~\penalty~ into a ~\vadjust~, which triggers the +% pagebuilder after putting the current line to the main +% vertical list. A ~\write~ is placed on the main vertical +% list, which prints a reference to the current value of +% ~\thelinenumber~ and ~\thepage~ at the time of the +% ~\shipout~. +% +% A ~\linelabel~ is allowed only in outer horizontal mode. +% In outer vertical mode we start a paragraph, and ignore +% trailing spaces (by fooling ~\@esphack~). +% +% The argument of ~\linelabel~ is put into a macro with a +% name derived from the number of the allocated float box. +% Much of the rest is dummy float setup. + +\def\linelabel#1{% + \ifvmode + \ifinner \else + \leavevmode \@bsphack \@savsk\p@ + \fi + \else + \@bsphack + \fi + \ifhmode + \ifinner + \@parmoderr + \else + \@floatpenalty -\@Mii + \@next\@currbox\@freelist + {\global\count\@currbox-2% + \expandafter\gdef\csname @LNL@\the\@currbox\endcsname{#1}}% + {\@floatpenalty\z@ \@fltovf \def\@currbox{\@tempboxa}}% + \begingroup + \setbox\@currbox \color@vbox \vbox \bgroup \end@float + \endgroup + \@ignorefalse \@esphack + \fi + \else + \@parmoderr + \fi + } + +% \modulolinenumbers[3] +% \section{ +% The appearance of the line numbers +% }\label{appearance} +% The line numbers are set as ~\tiny\sffamily\arabic{linenumber}~, +% $10pt$ left of the text. With options to place it +% right of the text, or . . . +% +% . . . here are the hooks: + +\def\makeLineNumberLeft{\hss\linenumberfont\LineNumber\hskip\linenumbersep} + +\def\makeLineNumberRight{\linenumberfont\hskip\linenumbersep\hskip\columnwidth + \hbox to\linenumberwidth{\hss\LineNumber}\hss} + +\def\linenumberfont{\normalfont\tiny\sffamily} + +\newdimen\linenumbersep +\newdimen\linenumberwidth + +\linenumberwidth=10pt +\linenumbersep=10pt + +% Margin switching requires ~pagewise~ numbering mode, but +% choosing the left or right margin for the numbers always +% works. + +\def\switchlinenumbers{\@ifstar + {\let\makeLineNumberOdd\makeLineNumberRight + \let\makeLineNumberEven\makeLineNumberLeft}% + {\let\makeLineNumberOdd\makeLineNumberLeft + \let\makeLineNumberEven\makeLineNumberRight}% + } + +\def\setmakelinenumbers#1{\@ifstar + {\let\makeLineNumberRunning#1% + \let\makeLineNumberOdd#1% + \let\makeLineNumberEven#1}% + {\ifx\c@linenumber\c@runninglinenumber + \let\makeLineNumberRunning#1% + \else + \let\makeLineNumberOdd#1% + \let\makeLineNumberEven#1% + \fi}% + } + +\def\leftlinenumbers{\setmakelinenumbers\makeLineNumberLeft} +\def\rightlinenumbers{\setmakelinenumbers\makeLineNumberRight} + +\leftlinenumbers* + +% ~\LineNumber~ is a hook which is used for the modulo stuff. +% It is the command to use for the line number, when you +% customizes ~\makeLineNumber~. Use ~\thelinenumber~ to +% change the outfit of the digits. +% +% +% We will implement two modes of operation: +% \begin{itemize} +% \item numbers ~running~ through (parts of) the text +% \item ~pagewise~ numbers starting over with one on top of +% each page. +% \end{itemize} +% Both modes have their own count register, but only one is +% allocated as a \LaTeX\ counter, with the attached +% facilities serving both. + +\newcounter{linenumber} +\newcount\c@pagewiselinenumber +\let\c@runninglinenumber\c@linenumber + +% Only the running mode counter may be reset, or preset, +% for individual paragraphs. The pagewise counter must +% give a unique anonymous number for each line. + +\newcommand\resetlinenumber[1][1]{\c@runninglinenumber#1} + +% \subsection{ +% Running line numbers +% } +% Running mode is easy, ~\LineNumber~ and ~\theLineNumber~ +% produce ~\thelinenumber~, which defaults to +% ~\arabic{linenumber}~, using the ~\c@runninglinenumber~ +% counter. This is the default mode of operation. + +\def\makeRunningLineNumber{\makeLineNumberRunning} + +\def\setrunninglinenumbers{% + \def\theLineNumber{\thelinenumber}% + \let\c@linenumber\c@runninglinenumber + \let\makeLineNumber\makeRunningLineNumber + } + +\setrunninglinenumbers\resetlinenumber + +% +% +% \subsection{ +% Pagewise line numbers +% } +% Difficult, if you think about it. The number has to be +% printed when there is no means to know on which page it +% will end up, except through the aux-file. My solution +% is really expensive, but quite robust. +% +% With version ~v2.00~ the hashsize requirements are +% reduced, because we do not need one controlsequence for +% each line any more. But this costs some computation time +% to find out on which page we are. +% +% ~\makeLineNumber~ gets a hook to log the line and page +% number to the aux-file. Another hook tries to find out +% what the page offset is, and subtracts it from the counter +% ~\c@linenumber~. Additionally, the switch +% ~\ifoddNumberedPage~ is set true for odd numbered pages, +% false otherwise. + +\def\setpagewiselinenumbers{% + \let\theLineNumber\thePagewiseLineNumber + \let\c@linenumber\c@pagewiselinenumber + \let\makeLineNumber\makePagewiseLineNumber + } + +\def\makePagewiseLineNumber{\logtheLineNumber\getLineNumber + \ifoddNumberedPage + \makeLineNumberOdd + \else + \makeLineNumberEven + \fi + } + +% Each numbered line gives a line to the aux file +% \begin{verse} +% ~\@LN{~<line>~}{~<page>~}~ +% \end{verse} +% very similar to the ~\newlabel~ business, except that we need +% an arabic representation of the page number, not what +% there might else be in ~\thepage~. + +\def\logtheLineNumber{\protected@write\@auxout{}{% + \string\@LN{\the\c@linenumber}{\noexpand\the\c@page}}} + +% From the aux-file we get one macro ~\LN@P~<page> for each +% page with line numbers on it. This macro calls four other +% macros with one argument each. These macros are +% dynamically defined to do tests and actions, to find out +% on which page the current line number is located. +% +% We need sort of a pointer to the first page with line +% numbers, initiallized to point to nothing: + +\def\LastNumberedPage{first} +\def\LN@Pfirst{\nextLN\relax} + +% The four dynamic macros are initiallized to reproduce +% themselves in an ~\xdef~ + +\let\lastLN\relax % compare to last line on this page +\let\firstLN\relax % compare to first line on this page +\let\pageLN\relax % get the page number, compute the linenumber +\let\nextLN\relax % move to the next page + +% During the end-document run through the aux-files, we +% disable ~\@LN~. I may put in a check here later, to give +% a rerun recommendation. + +\AtEndDocument{\let\@LN\@gobbletwo} + +% Now, this is the tricky part. First of all, the whole +% definition of ~\@LN~ is grouped, to avoid accumulation +% on the save stack. Somehow ~\csname~<cs>~\endcsname~ pushes +% an entry, which stays after an ~\xdef~ to that <cs>. +% +% If ~\LN@P~<page> is undefined, initialize it with the +% current page and line number, with the +% \emph{pointer-to-the-next-page} pointing to nothing. And +% the macro for the previous page will be redefined to point +% to the current one. +% +% If the macro for the current page already exists, just +% redefine the \emph{last-line-number} entry. +% +% Finally, save the current page number, to get the pointer to the +% following page later. + +\def\@LN#1#2{{\expandafter\@@LN + \csname LN@P#2C\@LN@column\expandafter\endcsname + \csname LN@PO#2\endcsname + {#1}{#2}}} + +\def\@@LN#1#2#3#4{\ifx#1\relax + \ifx#2\relax\gdef#2{#3}\fi + \expandafter\@@@LN\csname LN@P\LastNumberedPage\endcsname#1 + \xdef#1{\lastLN{#3}\firstLN{#3}\pageLN{#4}{\@LN@column}{#2}\nextLN\relax}% + \else + \def\lastLN##1{\noexpand\lastLN{#3}}% + \xdef#1{#1}% + \fi + \xdef\LastNumberedPage{#4C\@LN@column}} + +% The previous page macro gets its pointer to the +% current one, replacing the ~\relax~ with the cs-token +% ~\LN@P~<page>. + +\def\@@@LN#1#2{{\def\nextLN##1{\noexpand\nextLN\noexpand#2}% + \xdef#1{#1}}} + +% Now, to print a line number, we need to find the page, +% where it resides. This will most probably be the page where +% the last one came from, or maybe the next page. However, it can +% be a completely different one. We maintain a cache, +% which is ~\let~ to the last page's macro. But for now +% it is initialized to expand ~\LN@first~, where the poiner +% to the first numbered page has been stored in. + +\def\NumberedPageCache{\LN@Pfirst} + +% To find out on which page the current ~\c@linenumber~ is, +% we define the four dynamic macros to do something usefull +% and execute the current cache macro. ~\lastLN~ is run +% first, testing if the line number in question may be on a +% later page. If so, disable ~\firstLN~, and go on to the +% next page via ~\nextLN~. + +\def\testLastNumberedPage#1{\ifnum#1<\c@linenumber + \let\firstLN\@gobble + \fi} + +% Else, if ~\firstLN~ finds out that we need an earlier +% page, we start over from the beginning. Else, ~\nextLN~ +% will be disabled, and ~\pageLN~ will run +% ~\gotNumberedPage~ with four arguments: the first line +% number on this column, the page number, the column +% number, and the first line on the page. + +\def\testFirstNumberedPage#1{\ifnum#1>\c@linenumber + \def\nextLN##1{\testNextNumberedPage\LN@Pfirst}% + \else + \let\nextLN\@gobble + \def\pageLN{\gotNumberedPage{#1}}% + \fi} + +% We start with ~\pageLN~ disabled and ~\nextLN~ defined to +% continue the search with the next page. + +\long\def \@gobblethree #1#2#3{} + +\def\testNumberedPage{% + \let\lastLN\testLastNumberedPage + \let\firstLN\testFirstNumberedPage + \let\pageLN\@gobblethree + \let\nextLN\testNextNumberedPage + \NumberedPageCache + } + +% When we switch to another page, we first have to make +% sure that it is there. If we are done with the last +% page, we probably need to run \TeX\ again, but for the +% rest of this run, the cache macro will just return four +% zeros. This saves a lot of time, for example if you have +% half of an aux-file from an aborted run, in the next run +% the whole page-list would be searched in vain again and +% again for the second half of the document. +% +% If there is another page, we iterate the search. + +\def\testNextNumberedPage#1{\ifx#1\relax + \global\def\NumberedPageCache{\gotNumberedPage0000}% + \PackageWarningNoLine{lineno}% + {Linenumber reference failed, + \MessageBreak rerun to get it right}% + \else + \global\let\NumberedPageCache#1% + \fi + \testNumberedPage + } + +% \linelabel{demo2} +% \marginpar{\tiny\raggedright +% Let's see if it finds the label +% on page \pageref{demo}, +% line \ref{demo}, and back here +% on page \pageref{demo2}, line +% \ref{demo2}. +% }% +% To separate the official hooks from the internals there is +% this equivalence, to hook in later for whatever purpose: + +\let\getLineNumber\testNumberedPage + +% So, now we got the page where the number is on. We +% establish if we are on an odd or even page, and calculate +% the final line number to be printed. + +\newif\ifoddNumberedPage +\newif\ifcolumnwiselinenumbers +\columnwiselinenumbersfalse + +\def\gotNumberedPage#1#2#3#4{\oddNumberedPagefalse + \ifodd \if@twocolumn #3\else #2\fi\relax\oddNumberedPagetrue\fi + \advance\c@linenumber 1\relax + \ifcolumnwiselinenumbers + \subtractlinenumberoffset{#1}% + \else + \subtractlinenumberoffset{#4}% + \fi + } + +% You might want to run the pagewise mode with running line +% numbers, or you might not. It's your choice: + +\def\runningpagewiselinenumbers{% + \let\subtractlinenumberoffset\@gobble + } + +\def\realpagewiselinenumbers{% + \def\subtractlinenumberoffset##1{\advance\c@linenumber-##1\relax}% + } + +\realpagewiselinenumbers + +% For line number references, we need a protected call to +% the whole procedure, with the requested line number stored +% in the ~\c@linenumber~ counter. This is what gets printed +% to the aux-file to make a label: + +\def\thePagewiseLineNumber{\protect + \getpagewiselinenumber{\the\c@linenumber}}% + +% And here is what happens when the label is refered to: + +\def\getpagewiselinenumber#1{{% + \c@linenumber #1\relax\testNumberedPage + \thelinenumber + }} + +% % +% A summary of all per line expenses: +% \begin{description}\item +% [CPU:] The ~\output~ routine is called for each line, +% and the page-search is done. +% \item +% [DISK:] One line of output to the aux-file for each +% numbered line +% \item +% [MEM:] One macro per page. Great improvement over v1.02, +% which had one control sequence per line in +% addition. It blew the hash table after some five +% thousand lines. +% \end{description} +% +% +% +% \subsection{ +% Twocolumn mode (New v3.06) +% } +% +% Twocolumn mode requires another patch to the ~\output~ +% routine, in order to print a column tag to the .aux +% file. + +\let\@LN@orig@makecol\@makecol +\def\@LN@makecol{% + \@LN@orig@makecol + \setbox\@outputbox \vbox{% + \boxmaxdepth \@maxdepth + \protected@write\@auxout{}{% + \string\@LN@col{\if@firstcolumn1\else2\fi}% + }% + \box\@outputbox + }% \vbox +} + +\def\@LN@col#1{\def\@LN@column{#1}} +\@LN@col{1} + +% +% +% +% \subsection{ +% Numbering modulo 5 +% } +% Most users want to have only one in five lines numbered. +% ~\LineNumber~ is supposed to produce the outfit of the +% line number attached to the line, while ~\thelinenumber~ +% is used also for references, which should appear even if +% they are not multiples of five. + +\newcount\c@linenumbermodulo + +\def\themodulolinenumber{{\@tempcnta\c@linenumber + \divide\@tempcnta\c@linenumbermodulo + \multiply\@tempcnta\c@linenumbermodulo + \ifnum\@tempcnta=\c@linenumber\thelinenumber\fi + }} + +% The user command to set the modulo counter: + +\newcommand\modulolinenumbers[1][0]{% + \let\LineNumber\themodulolinenumber + \ifnum#1>1\relax + \c@linenumbermodulo#1\relax + \else\ifnum#1=1\relax + \def\LineNumber{\thelinenumber}% + \fi\fi + } + +\setcounter{linenumbermodulo}{5} +\modulolinenumbers[1] + +% +% \switchlinenumbers +% \modulolinenumbers[1] +% \section{ +% Package options +% } +% There is a bunch of package options, all of them +% executing only user commands (see below). +% +% Options ~left~ (~right~) put the line numbers on the left +% (right) margin. This works in all modes. ~left~ is the +% default. + +\DeclareOption{left}{\leftlinenumbers*} + +\DeclareOption{right}{\rightlinenumbers*} + +% Option ~switch~ (~switch*~) puts the line numbers on the +% outer (inner) margin of the text. This requires running +% the pagewise mode, but we turn off the page offset +% subtraction, getting sort of running numbers again. The +% ~pagewise~ option may restore true pagewise mode later. + +\DeclareOption{switch}{\setpagewiselinenumbers + \switchlinenumbers + \runningpagewiselinenumbers} + +\DeclareOption{switch*}{\setpagewiselinenumbers + \switchlinenumbers*% + \runningpagewiselinenumbers} + +% In twocolumn mode, we can switch the line numbers to +% the outer margin, and/or start with number 1 in each +% column. Margin switching is covered by the ~switch~ +% options. + +\DeclareOption{columnwise}{\setpagewiselinenumbers + \columnwiselinenumberstrue + \realpagewiselinenumbers} + +% The options ~pagewise~ and ~running~ select the major +% linenumber mechanism. ~running~ line numbers refer to a real +% counter value, which can be reset for any paragraph, +% even getting multiple paragraphs on one page starting +% with line number one. ~pagewise~ line numbers get a +% unique hidden number within the document, but with the +% opportunity to establish the page on which they finally +% come to rest. This allows the subtraction of the page +% offset, getting the numbers starting with 1 on top of each +% page, and margin switching in twoside formats becomes +% possible. The default mode is ~running~. +% +% The order of declaration of the options is important here +% ~pagewise~ must come after ~switch~, to overide running +% pagewise mode. ~running~ comes last, to reset the running +% line number mode, e.g, after selecting margin switch mode +% for ~pagewise~ running. Once more, if you specify all +% three of the options ~[switch,pagewise,running]~, the +% result is almost nothing, but if you later say +% ~\pagewiselinenumbers~, you get margin switching, with +% real pagewise line numbers. +% +\DeclareOption{pagewise}{\setpagewiselinenumbers + \realpagewiselinenumbers} + +\DeclareOption{running}{\setrunninglinenumbers} + +% The option ~modulo~ causes only those linenumbers to be +% printed which are multiples of five. + +\DeclareOption{modulo}{\modulolinenumbers\relax} + +% The package option ~mathlines~ switches the behavior of +% the ~{linenomath}~ environment with its star-form. +% Without this option, the ~{linenomath}~ environment does +% not number the lines of the display, while the star-form +% does. With this option, its just the opposite. +% +%%% 1999-06-10: renamed ~displaymath~ to ~mathlines~. + +\DeclareOption{mathlines}{\linenumberdisplaymath} + +% ~displaymath~ now calls for wrappers of the standard +% LaTeX display math environment. This was previously +% done by ~mlineno.sty~. + +\let\do@mlineno\relax +\DeclareOption{displaymath}{\let\do@mlineno\@empty} + +% The ~hyperref~ package, via ~nameref~, requires three more +% groups in the second argment of a ~\newlabel~. Well, why +% shouldn't it get them? (New v3.07) The presencs of the +% ~nameref~ package is now detected automatically +% ~\AtBeginDocument~. (/New v3.07) (Fixed in v3.09) We try +% to be smart, and test ~\AtBeginDocument~ if the ~nameref~ +% package is loaded, but ~hyperref~ postpones the loading of +% ~nameref~ too, so this is all in vain. + +\DeclareOption{hyperref}{\PackageWarningNoLine{lineno}{% + Option [hyperref] is obsolete. + \MessageBreak The hyperref package is detected automatically.}} + +\AtBeginDocument{% + \@ifpackageloaded{nameref}{% + \def\@LN@ExtraLabelItems{{}{}{}}}{}} + +\ProcessOptions + +% \subsection{ +% Package Extensions +% } +% +% The extensions in this section were previously supplied +% in seperate ~.sty~ files. +% +% \subsubsection{ +% $display math$ +% } +% +% The standard \LaTeX\ display math environments are +% wrapped in a ~{linenomath}~ environment. +% +% (New 3.05) The ~[fleqn]~ option of the standard +% \LaTeX\ classes defines the display math +% environments such that line numbers appear just +% fine. Thus, we need not do any tricks when +% ~[fleqn]~ is loaded, as indicated by presents of +% the ~\mathindent~ register. (/New 3.05) +% +% (New 3.05a) for ~{eqnarray}~s we rather keep the +% old trick. (/New 3.05a) +% +% (New 3.08) Wrap ~\[~ and ~\]~ into ~{linenomath}~, +% instead of ~{displaymath}~. Also save the definition +% of ~\equation~, instead of replicating the current +% \LaTeX\ definition. (/New 3.08) + +\ifx\do@mlineno\@empty + \@ifundefined{mathindent}{ + + \let\LN@displaymath\[ + \let\LN@enddisplaymath\] + \renewcommand\[{\begin{linenomath}\LN@displaymath} + \renewcommand\]{\LN@enddisplaymath\end{linenomath}} + + \let\LN@equation\equation + \let\LN@endequation\endequation + \renewenvironment{equation} + {\linenomath\LN@equation} + {\LN@endequation\endlinenomath} + + }% \@ifundefined{mathindent} + + \let\LN@eqnarray\eqnarray + \let\LN@endeqnarray\endeqnarray + \renewenvironment{eqnarray} + {\linenomath\LN@eqnarray} + {\LN@endeqnarray\endlinenomath} + +\fi + +% \subsubsection{ +% Line numbers in internal vertical mode +% } +% +% The command ~\internallinenumbers~ adds line numbers in +% internal vertical mode, but with limitations: we assume +% fixed baseline skip. + +\def\internallinenumbers{\setrunninglinenumbers + \let\@@par\internallinenumberpar + \ifx\@par\@@@par\let\@par\internallinenumberpar\fi + \ifx\par\@@@par\let\par\internallinenumberpar\fi + \ifx\@par\linenumberpar\let\@par\internallinenumberpar\fi + \ifx\par\linenumberpar\let\par\internallinenumberpar\fi + \@ifnextchar[{\resetlinenumber}%] + {\@ifstar{\let\c@linenumber\c@internallinenumber + \c@linenumber\@ne}{}}% + } + +\let\endinternallinenumbers\endlinenumbers +\@namedef{internallinenumbers*}{\internallinenumbers*} +\expandafter\let\csname endinternallinenumbers*\endcsname\endlinenumbers + +\newcount\c@internallinenumber +\newcount\c@internallinenumbers + +\def\internallinenumberpar{\ifvmode\@@@par\else\ifinner\@@@par\else\@@@par + \begingroup + \c@internallinenumbers\prevgraf + \setbox\@tempboxa\hbox{\vbox{\makeinternalLinenumbers}}% + \dp\@tempboxa\prevdepth + \ht\@tempboxa\z@ + \nobreak\vskip-\prevdepth + \nointerlineskip\box\@tempboxa + \endgroup + \fi\fi + } + +\def\makeinternalLinenumbers{\ifnum\c@internallinenumbers>0\relax + \hbox to\z@{\makeLineNumber}\global\advance\c@linenumber\@ne + \advance\c@internallinenumbers\m@ne + \expandafter\makeinternalLinenumbers\fi + } + +% \subsubsection{ +% Line number references with offset +% } +% +% This extension defines macros to refer to line +% numbers with an offset, e.g., to refer to a line +% which cannot be labeled directly (display math). +% This was formerly knows as ~rlineno.sty~. +% +% To refer to a pagewise line number with offset: +% \begin{quote} +% ~\linerefp[~<OFFSET>~]{~<LABEL>~}~ +% \end{quote} +% To refer to a running line number with offset: +% \begin{quote} +% ~\linerefr[~<OFFSET>~]{~<LABEL>~}~ +% \end{quote} +% To refer to a line number labeled in the same mode as currently +% selected: +% \begin{quote} +% ~\lineref[~<OFFSET>~]{~<LABEL>~}~ +% \end{quote} + +\newcommand\lineref{% + \ifx\c@linenumner\c@runninglinenumner + \expandafter\linerefr + \else + \expandafter\linerefp + \fi +} + +\newcommand\linerefp[2][\z@]{{% + \let\@thelinenumber\thelinenumber + \edef\thelinenumber{\advance\c@linenumber#1\relax\noexpand\@thelinenumber}% + \ref{#2}% +}} + +% This goes deep into \LaTeX s internals. + +\newcommand\linerefr[2][\z@]{{% + \def\@@linerefadd{\advance\c@linenumber#1}% + \expandafter\@setref\csname r@#2\endcsname + \@linerefadd{#2}% +}} + +\newcommand\@linerefadd[2]{\c@linenumber=#1\@@linerefadd\relax + \thelinenumber} + +% \subsubsection{ +% Numbered quotation environments +% } +% +% The ~{numquote}~ and ~{numquotation}~ +% environments are like ~{quote}~ and +% ~{quotation}~, except there will be line +% numbers. +% +% An optional argument gives the number to count +% from. A star ~*~ (inside or outside the closing +% ~}~) prevent the reset of the line numbers. +% Default is to count from one. + +\newcommand\quotelinenumbers + {\@ifstar\linenumbers{\@ifnextchar[\linenumbers{\linenumbers*}}} + +\newdimen\quotelinenumbersep +\quotelinenumbersep=\linenumbersep +\let\quotelinenumberfont\linenumberfont + +\newcommand\numquotelist + {\leftlinenumbers + \linenumbersep\quotelinenumbersep + \let\linenumberfont\quotelinenumberfont + \addtolength{\linenumbersep}{-\@totalleftmargin}% + \quotelinenumbers + } + +\newenvironment{numquote} {\quote\numquotelist}{\endquote} +\newenvironment{numquotation} {\quotation\numquotelist}{\endquotation} +\newenvironment{numquote*} {\quote\numquotelist*}{\endquote} +\newenvironment{numquotation*}{\quotation\numquotelist*}{\endquotation} + +% \subsubsection{ +% Frame around a paragraph +% } +% +% The ~{bframe}~ environment draws a frame around +% some text, across page breaks, if necessary. +% +% This works only for plain text paragraphs, +% without special height lines. All lines must be +% ~\baselineskip~ apart, no display math. + +\newenvironment{bframe} + {\par + \@tempdima\textwidth + \advance\@tempdima 2\bframesep + \setbox\bframebox\hbox to\textwidth{% + \hskip-\bframesep + \vrule\@width\bframerule\@height\baselineskip\@depth\bframesep + \advance\@tempdima-2\bframerule + \hskip\@tempdima + \vrule\@width\bframerule\@height\baselineskip\@depth\bframesep + \hskip-\bframesep + }% + \hbox{\hskip-\bframesep + \vrule\@width\@tempdima\@height\bframerule\@depth\z@}% + \nointerlineskip + \copy\bframebox + \nobreak + \kern-\baselineskip + \runninglinenumbers + \def\makeLineNumber{\copy\bframebox\hss}% + } + {\par + \kern-\prevdepth + \kern\bframesep + \nointerlineskip + \@tempdima\textwidth + \advance\@tempdima 2\bframesep + \hbox{\hskip-\bframesep + \vrule\@width\@tempdima\@height\bframerule\@depth\z@}% + } + +\newdimen\bframerule +\bframerule=\fboxrule + +\newdimen\bframesep +\bframesep=\fboxsep + +\newbox\bframebox + +% \section{ +% The final touch +% } +% There is one deadcycle for each line number. + +\advance\maxdeadcycles 100 + +\endinput + +% \section{ +% The user commands +% } +% The user command to turn on and off line numbering +% are +% \begin{description}\item +% [|\linenumbers] \ \par +% Turn on line numbering in the current mode. +% \item +% [|\linenumbers*] \ \par$\qquad$ +% and reset the line number to 1. +% \def\NL{<number>]}\item +% [|\linenumbers[\NL] \ \par$\qquad$ +% and start with <number>. +% \item +% [|\nolinenumbers] \ \par +% Turn off line numbering. +% \item +% [|\runninglinenumbers*[\NL] \ \par +% Turn on ~running~ line numbers, with the same optional +% arguments as ~\linenumbers~. The numbers are running +% through the text over pagebreaks. When you turn +% numbering off and on again, the numbers will continue, +% except, of cause, if you ask to reset or preset the +% counter. +% \item +% [|\pagewiselinenumbers] \ \par +% Turn on ~pagewise~ line numbers. The lines on each +% page are numbered beginning with one at the first +% ~pagewise~ numbered line. +% \item +% [|\resetlinenumber[\NL] \ \par +% Reset ~[~Set~]~ the line number to 1 +% ~[~<number>~]~. +% \item +% [|\setrunninglinenumbers] \ \par +% Switch to ~running~ line number mode. Do \emph{not} +% turn it on or off. +% \item +% [|\setpagewiselinenumbers] \ \par +% Switch to ~pagewise~ line number mode. Do \emph{not} +% turn it on or off. +% \item +% [|\switchlinenumbers*] \ \par +% Causes margin switching in pagewise modes. With the +% star, put the line numbers on the inner margin. +% \item +% [|\leftlinenumbers*] \ \par +% \item +% [|\rightlinenumbers*] \ \par +% Set the line numbers in the left/right margin. With the +% star this works for both modes of operation, without +% the star only for the currently selected mode. +% \item +% [|\runningpagewiselinenumbers] \ \par +% When using the pagewise line number mode, do not +% subtract the page offset. This results in running +% line numbers again, but with the possibility to switch +% margins. Be careful when doing line number +% referencing, this mode status must be the same while +% setting the paragraph and during references. +% \item +% [|\realpagewiselinenumbers] \ \par +% Reverses the effect of ~\runningpagewiselinenumbers~. +% \item +% [|\modulolinenumbers[\NL] \ \par +% Give a number only to lines which are multiples of +% ~[~<number>~]~. If <number> is not specified, the +% current value in the counter ~linenumbermodulo~ is +% retained. <number>=1 turns this off without changing +% ~linenumbermodulo~. The counter is initialized to 5. +% \item +% [|\linenumberdisplaymath] \ \par +% Number the lines of a display math in a ~{linenomath}~ +% environment, but do not in a ~{linenomath*}~ +% environment. This is used by the package option +% ~[mathlines]~. +% \item +% [|\nolinenumberdisplaymath] \ \par +% Do not Number the lines of a display math in a +% ~{linenomath}~ environment, but do in a +% ~{linenomath*}~ environment. This is the default. +% \item +% [|\linelabel] \ \par +% Set a ~\linelabel{~<foo>~}~ to the line number where +% this commands is in. Refer to it with the \LaTeX\ +% referencing commands ~\ref{~<foo>~}~ and +% ~\pageref{~<foo>~}~. +% \end{description} +% The commands can be used globally, locally within groups +% or as environments. It is important to know that they +% take action only when the ~\par~ is executed. The +% ~\end{~<mode>~linenumbers}~ commands provide a ~\par~. +% Examples: +% \begin{verse} +% ~{\linenumbers~ <text> ~\par}~ \\ +% \ \\ +% ~\begin{linenumbers}~ \\ +% <text> \\ +% ~\end{linenumbers}~ \\ +% \ \\ +% <paragraph> ~{\linenumbers\par}~ \\ +% \ \\ +% ~\linenumbers~ \\ +% <text> ~\par~ \\ +% ~\nolinenumbers~ \\ +% \ \\ +% ~\linenumbers~ \\ +% <paragraph> ~{\nolinenumbers\par}~ \\ +% \end{verse} +% +% +% \subsection{ +% Customization hooks +% } +% There are several hooks to customize the appearance of the +% line numbers, and some low level hooks for special +% effects. +% \begin{description}\item +% [|\thelinenumber] \ \par +% This macro should give the representation of the line +% number in the \LaTeX-counter ~linenumber~. The +% default is provided by \LaTeX: \par$\qquad$ +% ~\arabic{linenumber}~ +% \item +% [|\makeLineNumberLeft] \ \par +% This macro is used to attach a line number to the left +% of the text page. This macro should fill an ~\hbox to 0pt~ +% which will be placed at the left margin of the +% page, with the reference point aligned to the line to +% which it should give a number. Please use the macro +% ~\LineNumber~ to refer to the line number. +% +% The default definition is \par$\qquad$ +% ~\hss\linenumberfont\LineNumber\hskip\linenumbersep~ +% \item +% [|\makeLineNumberRight] \ \par +% Like ~\makeLineNumberLeft~, but for line numbers on +% the right margin. +% +% The default definition is \par$\qquad$ +% ~\linenumberfont\hskip\linenumbersep\hskip\textwidth~ \par$\qquad$ +% ~\hbox to\linenumberwidth{\hss\LineNumber}\hss~ +% \item +% [|\linenumberfont] \ \par +% This macro is initialized to \par$\qquad$ +% ~\normalfont\tiny\sffamily~ +% \item +% [|\linenumbersep] \ \par +% This dimension register sets the separation of the +% linenumber to the text. Default value is ~10pt~. +% \item +% [|\linenumberwidth] \ \par +% This dimension register sets the width of the line +% number box on the right margin. The distance of the +% right edge of the text to the right edge of the line +% number is ~\linenumbersep~ + ~\linenumberwidth~. The +% default value is ~10pt~. +% \item +% [|\theLineNumber] (for wizards) \ \par +% This macro is called for printing a ~\newlabel~ entry +% to the aux-file. Its definition depends on the mode. +% For running line numbers it's just ~\thelinenumber~, +% while in pagewise mode, the page offset subtraction +% is done in here. +% \item +% [|\makeLineNumber] (for wizards) \ \par +% This macro produces the line numbers. The definition +% depends on the mode. In the running line numbers +% mode it just expands ~\makeLineNumberLeft~. +% \item +% [|\LineNumber] (for wizards) \ \par +% This macro is called by ~\makeLineNumber~ to typeset +% the line number. This hook is changed by the modulo +% mechanism. +% \end{description} +% \end{document}%D +------------------------------------------------------------------------------ + +echo "expect errors for unknown commands 'iffalse' and 'fi'";# SHELL +awk '/A[W]K/' lineno.sty | awk -f - lineno.sty >lineno.tex; # SHELL +latex lineno; latex lineno; latex lineno; latex lineno; # SHELL + +awk '/DOC A [W] K/' lineno.sty | awk -f - lineno.sty >lineno.doc; # DOC SH + +BEGIN{DOC=-1; # AWK DOC A W K + BEGINCODE = "\\begin{code}\\begin{verbatim}"; # AWK + ENDCODE = "\\end{verbatim}\n\\end{code}"; } # AWK + BEGINCODE = "% \\begin{macrocode}"; # DOC A W K + ENDCODE = "% \\end{macrocode}"; } # DOC A W K +/^[ \t]*$/ { ECNT++; next; } # AWK DOC A W K +/\\documentclass/{ sub("article","ltxdoc") } # DOC A W K +/%D$/ { sub("^%* *",""); sub("%D$",""); # DOC A W K + print > "lineno.drv"; next } # DOC A W K +/^%%/ { next; } # AWK DOC A W K +/^%/ { if (!DOC) { print ENDCODE; } # AWK DOC A W K + DOC=1; ECNT=0; # AWK DOC A W K + sub("^% *",""); # AWK + sub("^% *","% "); # DOC A W K + print; next; } # AWK DOC A W K +DOC<0 { next } # AWK DOC A W K +/^-+-$/ { if (!DOC) print ENDCODE; exit } # AWK DOC A W K +{ if (DOC) { ECNT=DOC=0; print BEGINCODE; } # AWK DOC A W K + while (ECNT>0) { print " "; ECNT--; } # AWK DOC A W K + print $0; } # AWK DOC A W K + diff --git a/doc/linuxtag.sty b/doc/linuxtag.sty new file mode 100644 index 0000000..3820886 --- /dev/null +++ b/doc/linuxtag.sty @@ -0,0 +1,67 @@ +% Modified for european a4 paper size and font size requirements for LT2K++ +% +% TEMPLATE for Usenix papers, specifically to meet requirements of +% TCL97 committee. +% originally a template for producing IEEE-format articles using LaTeX. +% written by Matthew Ward, CS Department, Worcester Polytechnic Institute. +% adapted by David Beazley for his excellent SWIG paper in Proceedings, +% Tcl 96 +% turned into a smartass generic template by De Clarke, with thanks to +% both the above pioneers +% use at your own risk. Complaints to /dev/null. +% make it two column with no page numbering, default is 10 point + +% include following in document. +%\documentclass[a4paper,10pt]{article} +%\usepackage{twocolumn,times} +%\pagestyle{empty} + +% force european A4 paper format +\setlength{\textwidth}{16.6cm} % width of main text +\setlength{\textheight}{25cm} % height of main text +\setlength{\topmargin}{-1.7in} % Subtract default offset +\addtolength{\topmargin}{2.2cm} % Topmargin +\setlength{\topskip}{0pt} % between header and text +\setlength{\headheight}{10pt} % no header +\setlength{\headsep}{10pt} % +\setlength{\oddsidemargin}{-1in} % Subtract default offset +\addtolength{\oddsidemargin}{2.2cm} % odd page left margin +\setlength{\evensidemargin}{-1in} % Subtract default offset +\addtolength{\evensidemargin}{2.2cm} % odd page left margin + +% set dimensions of columns, gap between columns, and space between paragraphs +\setlength{\columnsep}{0.25in} +\setlength{\parindent}{0pt} +\setlength{\parskip}{\baselineskip} + +% started out with art10.sty and modified params to conform to IEEE format +% further mods to conform to Usenix standard + +\makeatletter +%as Latex considers descenders in its calculation of interline spacing, +%to get 12 point spacing for normalsize text, must set it to 10 points +\def\@normalsize{\@setsize\normalsize{12pt}\xpt\@xpt +\abovedisplayskip 10pt plus2pt minus5pt\belowdisplayskip \abovedisplayskip +\abovedisplayshortskip \z@ plus3pt\belowdisplayshortskip 6pt plus3pt +minus3pt\let\@listi\@listI} + +%need a 12 pt font size for subsection and abstract headings +\def\subsize{\@setsize\subsize{12pt}\xipt\@xipt} + +%make section titles bold and 12 point, 2 blank lines before, 1 after +\def\section{\@startsection {section}{1}{\z@}{18pt plus 2pt minus 2pt} +{2pt plus 2pt minus 2pt}{\large\bf}} + +%make subsection titles bold and 11 point, 1 blank line before, 1 after +\def\subsection{\@startsection {subsection}{2}{\z@}{12pt plus 2pt minus 2pt} +{12pt plus 2pt minus 2pt}{\subsize\bf}} + +\renewcommand\scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt} +\renewcommand\tiny{\@setfontsize\tiny\@vpt\@vipt} +\renewcommand\large{\@setfontsize\large\@xiipt{12}} +\renewcommand\Large{\@setfontsize\Large\@xivpt{14}} +\renewcommand\LARGE{\@setfontsize\LARGE\@xviipt{18}} +\renewcommand\huge{\@setfontsize\huge\@xxpt{24}} +\renewcommand\Huge{\@setfontsize\Huge\@xxvpt{28}} + +\makeatother diff --git a/doc/newseg.fig b/doc/newseg.fig new file mode 100644 index 0000000..4a2f7ea --- /dev/null +++ b/doc/newseg.fig @@ -0,0 +1,84 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +6 5550 900 7425 3675 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5625 2850 6750 2850 6750 3450 5625 3450 5625 2850 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 6750 2700 6750 3600 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 6750 2850 7350 2850 7350 3450 6750 3450 6750 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 5625 1200 6750 1200 6750 1800 5625 1800 5625 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 5625 975 7350 975 7350 1950 5625 1950 5625 975 +2 1 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 2 + 6750 975 6750 1950 +2 2 0 2 0 7 50 0 44 0.000 0 0 -1 0 0 5 + 6750 1200 7350 1200 7350 1800 6750 1800 6750 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 5625 2700 7350 2700 7350 3600 5625 3600 5625 2700 +4 0 0 50 0 0 12 0.0000 4 180 885 5775 3225 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 6825 3225 .bss\001 +4 0 0 50 0 0 12 0.0000 4 180 885 5775 1575 .data ... .got\001 +4 0 0 50 0 0 12 0.0000 4 135 315 6825 1575 .bss\001 +-6 +6 7950 2775 10875 3525 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 8025 2850 9000 2850 9000 3450 8025 3450 8025 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 9825 2850 9000 2850 9000 3450 9825 3450 9825 2850 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 9825 2850 10800 2850 10800 3450 9825 3450 9825 2850 +4 0 0 50 0 0 12 0.0000 4 180 780 9000 3225 .gnu.liblist\001 +4 0 0 50 0 0 12 0.0000 4 180 930 9825 3225 .gnu.conflict\001 +4 0 0 50 0 0 12 0.0000 4 180 525 8175 3225 .dynstr\001 +-6 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 600 1200 2100 1200 2100 1800 600 1800 600 1200 +2 2 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 2100 1200 2775 1200 2775 1800 2100 1800 2100 1200 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4950 1200 2775 1200 2775 1800 4950 1800 4950 1200 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 2700 4950 2700 4950 3600 450 3600 450 2700 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 450 975 4950 975 4950 1950 450 1950 450 975 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 4950 2850 2775 2850 2775 3450 4950 3450 4950 2850 +2 2 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 5 + 750 2850 2250 2850 2250 3450 750 3450 750 2850 +2 2 1 2 0 7 50 0 -1 6.000 0 0 -1 0 0 5 + 8025 2700 10800 2700 10800 3600 8025 3600 8025 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 1350 1950 1500 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 11 + 0 0 1.00 60.00 120.00 + 3975 1950 3975 2025 3975 2100 3975 2175 3975 2250 3975 2325 + 3975 2400 3975 2475 3975 2550 3975 2625 3975 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 11 + 0 0 1.00 60.00 120.00 + 6150 1950 6150 2025 6150 2100 6150 2175 6150 2250 6150 2325 + 6150 2400 6150 2475 6150 2550 6150 2625 6150 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 11 + 0 0 1.00 60.00 120.00 + 6975 1950 6975 2025 6975 2100 6975 2175 6975 2250 6975 2325 + 6975 2400 6975 2475 6975 2550 6975 2625 6975 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2100 1950 8025 2700 +2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2775 1950 9000 2700 +4 0 0 50 0 0 12 0.0000 4 180 1335 675 1575 .interp ... .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 525 2175 1575 .dynstr\001 +4 0 0 50 0 0 12 0.0000 4 180 1890 2850 1575 .gnu.version ... .eh_frame\001 +4 0 0 50 0 0 12 0.0000 4 180 1335 825 3225 .interp ... .dynsym\001 +4 0 0 50 0 0 12 0.0000 4 180 1890 2850 3225 .gnu.version ... .eh_frame\001 diff --git a/doc/prelink.pdf b/doc/prelink.pdf Binary files differnew file mode 100644 index 0000000..0783007 --- /dev/null +++ b/doc/prelink.pdf diff --git a/doc/prelink.tex b/doc/prelink.tex new file mode 100644 index 0000000..5075030 --- /dev/null +++ b/doc/prelink.tex @@ -0,0 +1,3643 @@ +\documentclass[twoside]{article} + +\def\docversion{0.7} +% timezone: +01 == CET +\def\timezone{+01} +% Uncomment for draft print. +\def\isdraft{1} + +\newif\ifpdf +\ifx\pdfoutput\undefined + \pdffalse % we are not running PDFLaTeX +\else + \pdfoutput=1 % we are running PDFLaTeX + \pdftrue +\fi + +\usepackage{linuxtag} +\usepackage{times} +\usepackage{makeidx} +\usepackage{nomencl} +\usepackage[square]{natbib} +\usepackage{marvosym} +\usepackage{longtable} +\renewcommand\bibsection{\chapter{\bibname}} + +\ifpdf + \usepackage[pdftex]{graphics} + \usepackage{type1cm} + \usepackage{thumbpdf} + \pdfcompresslevel9 + \pdfinfo{/CreationDate (D:20030924012900\timezone'00')} + % The following code to set /ModDate comes from Heiko Oberdiek's paper + % one PDF & hyperref. I only added the \timezone stuff. + \begingroup + \def\twodigits#1{\ifnum#1<10 0\fi\the#1}% + \count0=\time \divide\count0 by 60 + \edef\x{\twodigits{\count0}}% + \multiply\count0 by 60 + \count1=\time \advance\count1 by -\count0 + \edef\x{\x\twodigits{\count1}}% + \edef\x{/ModDate (D:\the\year \twodigits\month \twodigits\day \x 00\timezone'00')}% + \expandafter\endgroup + \expandafter\pdfinfo\expandafter{\x}% + \input pdfcolor + + %% For a "Draft" mark on the pages uncomment the following: + \ifx\isdraft\undefined + \relax + \else + \usepackage{eso-pic} + \usepackage{color} + \makeatletter + \AddToShipoutPicture{\rm% + \setlength{\@tempdimb}{.5\paperwidth}% + \setlength{\@tempdimc}{.5\paperheight}% + \setlength{\unitlength}{1pt}% + \put(\strip@pt\@tempdimb,\strip@pt\@tempdimc){% + \makebox(0,0){\rotatebox{45}{\textcolor[gray]{0.9}{\fontsize{5cm}{5cm}\selectfont{Draft}}}} + } + } + \makeatother + \fi +\else + \usepackage[dvips]{graphics} +\fi + +\def\titlecolor{NavyBlue} +\makeatletter +\def\@sect#1#2#3#4#5#6[#7]#8{% + \ifnum #2>\c@secnumdepth + \let\@svsec\@empty + \else + \refstepcounter{#1}% + \protected@edef\@svsec{\@seccntformat{#1}\relax}% + \fi + \@tempskipa #5\relax + \ifdim \@tempskipa>\z@ + \begingroup + \hbox{\expandafter\csname\titlecolor\endcsname#6{% + \@hangfrom{\hskip #3\relax\@svsec}% + \interlinepenalty \@M #8\@@par}\Black}% + \endgroup + \csname #1mark\endcsname{#7}% + \addcontentsline{toc}{#1}{% + \ifnum #2>\c@secnumdepth \else + \protect\numberline{\csname the#1\endcsname}% + \fi + #7}% + \else + \def\@svsechd{% + \hbox{\expandafter\csname\titlecolor\endcsname#6{\hskip #3\relax + \@svsec #8}% + \csname #1mark\endcsname{#7}\Black}% + \addcontentsline{toc}{#1}{% + \ifnum #2>\c@secnumdepth \else + \protect\numberline{\csname the#1\endcsname}% + \fi + #7}}% + \fi + \@xsect{#5}} +\def\@ssect#1#2#3#4#5{% + \@tempskipa #3\relax + \ifdim \@tempskipa>\z@ + \begingroup + \expandafter\csname\titlecolor\endcsname#4{% + \@hangfrom{\hskip #1}% + \interlinepenalty \@M #5\@@par}\Black% + \endgroup + \else + \def\@svsechd{\expandafter\csname\titlecolor\endcsname#4{\hskip #1\relax #5}\Black}% + \fi + \@xsect{#3}} +\makeatother + +\usepackage{fancyheadings} +\pagestyle{fancy} +\rhead{} +\chead{} +\lhead{} +\rfoot[\sl Prelink]{\thepage} +\lfoot[\thepage]{\sl Jakub Jel\'\i nek} +\ifx\isdraft\undefined +\cfoot{Version \docversion} +\else +\cfoot{Draft \docversion} +\fi +\renewcommand{\headrulewidth}{0.4pt} +\renewcommand{\footrulewidth}{0.4pt} + +\ifx\isdraft\undefined +\relax +\else +\usepackage[mathlines]{lineno} +\fi + +\usepackage{graphicx} +\usepackage{hyperref} +\hypersetup{ + bookmarksnumbered, + bookmarksopen=true, + pdfpagemode=UseOutlines, + pdfkeywords={Prelink, ELF, DSO, Shared Library, Dynamic Linking, Linux} +} +\usepackage{prelinklisting} + +\def\tts#1{\texttt{\small #1}} + +\setcounter{dbltopnumber}{3} + + +\makeatletter +\newcommand{\annotate}[2][]{% + \marginpar{% + \pdfstringdef\x@title{#1}% + \edef\r{\string\r}% + \pdfstringdef\x@contents{#2}% + \pdfannot + width 50em%\linewidth + height .5\baselineskip + depth 2.5\baselineskip + { + /Subtype /Text + /T (\x@title) + /Contents (\x@contents) + }% + } + } +\makeatother + +\makeglossary +\makeindex + +\begin{document} + + \makeatletter + \newcommand\orgmaketitle{} + \let\orgmaketitle\maketitle + \def\maketitle{% + \hypersetup{ + pdftitle={\@title}, + pdfsubject={Description of prelink tool}, + pdfauthor={\@author} + }% + \orgmaketitle + } + \makeatother + +\title{Prelink} +\author{Jakub Jel\'\i nek\\ +Red Hat, Inc.\\ +{\small\tt\href{mailto:jakub@redhat.com}{jakub@redhat.com}}} + +%\maketitle + +%\tableofcontents +%\vfil\break +%\listoftables +%\vfil\break +%\listofprelinklistings +%\vfil\break +%\listoffigures +%\vfil\break + +\maketitle + +\begin{center} +\begin{abstract} +\vspace*{.5\baselineskip} +\parbox{0.8\textwidth}{% +Prelink is a tool designed to speed up dynamic linking of ELF +programs on various Linux architectures. +It speeds up start up of OpenOffice.org 1.1 by 1.8s from 5.5s on 651MHz Pentium III.} +\end{abstract} +\end{center} + +\ifx\isdraft\undefined + \relax +\else + \linenumbers + \linenumbersep4pt +\fi + +\section{Preface} + +In 1995, Linux changed its binary format from \tts{a.out} to \tts{ELF}. +The \tts{a.out} binary format was very inflexible and shared libraries +were pretty hard to build. Linux's shared libraries in \tts{a.out} are position +dependent and each had to be given a unique virtual address space slot +at link time. Maintaining these assignments was pretty hard even when +there were just a few shared libraries, there used to be a central address +registry maintained by humans in form of a text file, but it is certainly +impossible to do these days when there are thousands of different shared libraries +and their size, version and exported symbols are constantly changing. +On the other side, there was just minimum amount of work the dynamic +linker had to do in order to load these shared libraries, as relocation +handling and symbol lookup was only done at link time. The dynamic linker +used the \tts{uselib} system call which just mapped the named library +into the address space (with no segment or section protection differences, +the whole mapping was writable and executable). + +The \href{http://www.caldera.com/developers/devspecs/gabi41.pdf}% +{\tts{ELF}} +\footnote{As described in generic ABI document [1] and various processor +specific ABI supplements [2], [3], [4], [5], [6], [7], [8].} +binary format is one of the most flexible binary formats, +its shared libraries are easy to build and there is no need for a central +assignment of virtual address space slots. Shared libraries are position +independent and relocation handling and symbol lookup are done partly +at the time the executable is created and partly at runtime. Symbols in shared +libraries can be overridden at runtime by preloading a new shared +library defining those symbols or without relinking an executable by adding +symbols to a shared library which is searched up earlier during symbol +lookup or by adding new dependent shared libraries to a library used by the +program. All these improvements have their price, which is a slower +program startup, more non-shareable memory per process and runtime cost +associated with position independent code in shared libraries. + +Program startup of \tts{ELF} programs is slower than startup of \tts{a.out} +programs with shared libraries, because the dynamic linker has much more work +to do before calling program's entry point. The cost of loading libraries +is just slightly bigger, as \tts{ELF} shared libraries have typically +separate read-only and writable segments, so the dynamic linker +has to use different memory protection for each segment. +The main difference is in relocation handling and associated symbol lookup. +In the \tts{a.out} format there was no relocation handling or symbol lookup at runtime. +In \tts{ELF}, this cost is much more important today than it used to be +during \tts{a.out} to \tts{ELF} transition in Linux, as especially GUI +programs keep constantly growing and start to use more and more shared +libraries. 5 years ago programs using more than 10 shared libraries +were very rare, these days most of the GUI programs link against around +40 or more shared and in extreme cases programs use even more than 90 +shared libraries. Every shared library adds its set of dynamic relocations +to the cost and enlarges symbol search scope, +\nomenclature{Symbol Search Scope}{The sequence of \tts{ELF} objects in +which a symbol is being looked up. When a symbol definition is found, +the searching stops and the found symbol is returned. Each program +has a global search scope, which starts by the executable, is typically +followed by the immediate dependencies of the executable and then their +dependencies in breadth search order (where only first occurrence +of each shared library is kept). If \tts{DT\_FILTER} +or \tts{DT\_AUXILIARY} dynamic tags are used the order is slightly +different. Each shared library loaded with \tts{dlopen} has its +own symbol search scope which contains that shared library and +its dependencies. \tts{Prelink} operates also with natural +symbol search scope of each shared library, which is the global +symbol search scope the shared library would have if it were started +as the main program} +so in addition to doing more symbol lookups, each symbol +lookup the application has to perform is on average more expensive. +Another factor increasing the cost is the length of symbol names +which have to be compared when finding symbol in the symbol hash table of +a shared library. C++ libraries tend to have extremely long symbol +names and unfortunately the new \href{http://www.codesourcery.com/cxx-abi/}% +{C++ ABI} puts namespaces and class names first and method names last +in the mangled names, so often symbol names differ only in last +few bytes of very long names. + +Every time a relocation is applied the entire memory page +\nomenclature{Page}{Memory block of fixed size which virtual memory +subsystem deals with as a unit. The size of the page depends on +the addressing hardware of the processor, typically pages are 4K or 8K, +in some cases bigger} +containing the address which is written to must be loaded into memory. +The operating system does a copy-on-write operation which also has the +consequence that the physical memory of the memory page cannot anymore +be shared with other processes. +With \tts{ELF}, typically all of program's Global Offset Table, +\nomenclature{Global Offset Table (\tts{GOT})}{When position independent +code needs to build address which requires dynamic relocation, instead +of building it as constant in registers and applying a dynamic relocation +against the read-only segment (which would mean that any pages of the +read-only segment where relocations are applied cannot be shared between +processes anymore), it loads the address from an offset table +private to each shared library, which is created by the linker. +The table is in writable segment and relocations are applied against it. +Position independent code uses on most architectures a special \tts{PIC} +register which points to the start of the Global Offset Table} +constants and variables containing pointers to objects in shared libraries, etc. +are written into before the dynamic linker passes control over to the program. + +On most architectures (with some exceptions like \tts{AMD64} architecture) +position independent code requires that one register needs to be dedicated as +\tts{PIC} register and thus cannot be used in the functions for other purposes. +This especially degrades performance on register-starved +architectures like \tts{IA-32}. Also, there needs to be some code to +set up the \tts{PIC} register, either invoked as part of function prologues, +or when using function descriptors in the calling sequence. + +\tts{Prelink} is a tool which (together with corresponding dynamic linker +and linker changes) attempts to bring back some of the \tts{a.out} +advantages (such as the speed and less COW'd pages) to the \tts{ELF} +binary format while retaining all of its flexibility. In a limited way +it also attempts to decrease number of non-shareable pages created by +relocations. +\tts{Prelink} works closely with the dynamic linker in the GNU C library, +but probably it wouldn't be too hard to port it to some other \tts{ELF} +using platforms where the dynamic linker can be modified in similar +ways. + +\section{Caching of symbol lookup results} + +Program startup can be speeded up by caching of symbol lookup +results\footnote{Initially, this has been implemented in the \tts{prelink} +tool and \tts{glibc} dynamic linker, where \tts{prelink} was sorting +relocation sections of existing executables and shared libraries. +When this has been implemented in the linker as well and most executables +and shared libraries are already built with \tts{-z combreloc}, +the code from \tts{prelink} has been removed, as it was no longer +needed for most objects and just increasing the tool's complexity.}. +Many shared libraries need more than one lookup of a particular symbol. +This is especially true for C++ shared libraries, where e.g. the same method +is present in multiple virtual tables or {\sl RTTI} data structures. +\nomenclature{RTTI}{C++ runtime type identification} +Traditionally, each \tts{ELF} section which needs dynamic relocations has an +associated \tts{.rela*} or \tts{.rel*} section (depending on whether +the architecture is defined to use \tts{RELA} or \tts{REL} relocations). +\nomenclature{RELA}{Type of relocation structure which includes offset, +relocation type, symbol against which the relocation is and an integer +addend which is added to the symbol. Memory at offset is not supposed +to be used by the relocation. Some architectures got this implemented +incorrectly and memory at offset is for some relocation types used +by the relocation, either in addition to addend or addend is not used +at all. \tts{RELA} relocations are generally better for \tts{prelink}, +since when \tts{prelink} stores a pre-computed value into the memory location +at offset, the addend value is not lost} +\nomenclature{REL}{Type of relocation structure which includes just offset, +relocation type and symbol. Addend is taken from memory location at +offset} +The relocations in those sections are typically sorted by ascending +\tts{r\_offset} values. +Symbol lookups are usually the most expensive operation during program +startup, so caching the symbol lookups has potential to decrease time +spent in the dynamic linker. +One way to decrease the cost of symbol lookups is to create a table with the +size equal to number of entries +in dynamic symbol table (\tts{.dynsym}) in the dynamic linker when resolving +a particular shared library, but that would in some cases need a lot of +memory and some time spent in initializing the table. Another option +would be to use a hash table with chained lists, but that needs both +extra memory and would also take extra time for computation of the hash value +and walking up the chains when doing new lookups. +Fortunately, neither of this is really necessary if we modify the linker +to sort relocations so that relocations against the same symbol +are adjacent. This has been done first in the \tts{Sun} linker and dynamic +linker, so the GNU linker and dynamic linker use the same \tts{ELF} extensions +and linker flags. Particularly, the following new \tts{ELF} dynamic tags have been introduced: + +\tts{\#define DT\_RELACOUNT 0x6ffffff9}\\ +\tts{\#define DT\_RELCOUNT 0x6ffffffa} + +New options \tts{-z combreloc} and \tts{-z nocombreloc} have been +added to the linker. The latter causes the previous linker behavior, +i.e. each section requiring relocations has a corresponding relocation section, +which is sorted by ascending \tts{r\_offset}. \tts{-z combreloc} +\footnote{\tts{-z combreloc} is the default in GNU linker versions +2.13 and later.} instructs the linker to create just one relocation +section for dynamic relocations other than symbol jump table (\tts{PLT}) +relocations. +\nomenclature{PLT}{Process Linkage Table. Stubs in \tts{ELF} shared +libraries and executables which allow lazy relocations of function calls. +They initially point to code which will do the symbol lookup. The +result of this symbol lookup is then stored in the Process Linkage Table +and control transfered to the address symbol lookup returned. All +following calls to the \tts{PLT} slot just branch to the already looked +up address directly, no further symbol lookup is needed} +This single relocation section (either \tts{.rela.dyn} or \tts{.rel.dyn}) +is sorted, so that relative relocations come first (sorted by ascending +\tts{r\_offset}), followed by other relocations, sorted again by ascending +\tts{r\_offset}. If more relocations are against the same +symbol, they immediately follow the first relocation against that symbol +with lowest \tts{r\_offset}. +\footnote{In fact the sorting needs to take into account also the type of +lookup. Most of the relocations will resolve to a \tts{PLT} slot in the executable +if there is one for the lookup symbol, because the executable might have a +pointer against that symbol without any dynamic relocations. But e.g. +relocations used for the \tts{PLT} slots must avoid these.}. +\nomenclature{relative relocation}{Relocation, which doesn't need a symbol +lookup, just adds a shared library load offset to certain memory location +(or locations)} +The number of relative relocations at the beginning of the section +is stored in the \tts{DT\_RELACOUNT} resp. \tts{DT\_RELCOUNT} dynamic tag. + +The dynamic linker can use the new dynamic tag for two purposes. +If the shared library is successfully mapped at the same address +as the first \tts{PT\_LOAD} segment's virtual address, the load offset +is zero and the dynamic linker can avoid all the relative relocations which +would just add zero to various memory locations. Normally shared libraries are +linked with first \tts{PT\_LOAD} segment's virtual address set to zero, so +the load offset is non-zero. This can be changed through a linker script or by +using a special \tts{prelink} option \tts{--reloc-only} to change +the base address of a shared library. All prelinked shared libraries +have non-zero base address as well. If the load offset is non-zero, the +dynamic linker can still make use of this dynamic tag, as relative +relocation handling is typically way simpler than handling other +relocations (since symbol lookup is not necessary) and thus it can +handle all relative relocations in a tight loop in one place and +then handle the remaining relocations with the fully featured +relocation handling routine. The second and more important point is +that if relocations against the same symbol are adjacent, the dynamic +linker can use a cache with single entry. + +The dynamic linker in \tts{glibc}, if it sees \tts{statistics} +as part of the \tts{LD\_DEBUG} environment variable, displays statistics +which can show how useful this optimization is. +Let's look at some big C++ application, e.g. konqueror. +If not using the cache, the statistics looks like this: + +\noindent{\small\begin{verbatim} +18000: runtime linker statistics: +18000: total startup time in dynamic loader: 270886059 clock cycles +18000: time needed for relocation: 266364927 clock cycles (98.3%) +18000: number of relocations: 79067 +18000: number of relocations from cache: 0 +18000: number of relative relocations: 31169 +18000: time needed to load objects: 4203631 clock cycles (1.5%) +\end{verbatim}} + +This program run is with hot caches, on non-prelinked system, with lazy +binding. +\nomenclature{Lazy Binding}{A way to postpone symbol lookups for calls until +a function is called for the first time in particular shared library. +This decreases number of symbol lookups done during startup and symbols +which are never called don't need to be looked up at all. Calls requiring +relocations jump into \tts{PLT}, which is initially set up so that a +function in the dynamic linker is called to do symbol lookup. The looked +up address is then stored either into the \tts{PLT} slot directly +(if \tts{PLT} is writable) or into \tts{GOT} entry corresponding +to the \tts{PLT slot} and any subsequent calls already go directly to that +address. Lazy binding can be turned off by setting \tts{LD\_BIND\_NOW=1} +in the environment. Prelinked programs never use lazy binding for the +executable or any shared libraries not loaded using \tts{dlopen}} +The numbers show that the dynamic linker spent most of its time +in relocation handling and especially symbol lookups. If using symbol +lookup cache, the numbers look different: + +\noindent{\small\begin{verbatim} +18013: total startup time in dynamic loader: 132922001 clock cycles +18013: time needed for relocation: 128399659 clock cycles (96.5%) +18013: number of relocations: 25473 +18013: number of relocations from cache: 53594 +18013: number of relative relocations: 31169 +18013: time needed to load objects: 4202394 clock cycles (3.1%) +\end{verbatim}} + +On average, for one real symbol lookup there were two cache hits and total +time spent in the dynamic linker decreased by 50\%. + +\section{Prelink design} + +\tts{Prelink} was designed, so that it requires as few \tts{ELF} extensions +as possible. It should not be tied to a particular architecture, but +should work on all \tts{ELF} architectures. During program startup it +should avoid all symbol lookups which, as has been shown above, are +very expensive. It needs to work in an environment where shared +libraries and executables are changing from time to time, whether it is +because of security updates or feature enhancements. It should avoid big code +duplication between the dynamic linker and the tool. And prelinked +shared libraries need to be usable even in non-prelinked executables, +or when one of the shared libraries is upgraded and the prelinking of the +executable has not been updated. + +To minimize the number of performed relocations during startup, +the shared libraries (and executables) need to be relocated +already as much as possible. For relative relocations this means the library +needs to be loaded always at the same base address, for other relocations +this means all shared libraries with definitions those relocations resolve +to (often this includes all shared libraries the library or executable depends on) +must always be loaded at the same addresses. \tts{ELF} executables +(with the exception of {\sl Position Independent Executables}) +\nomenclature{Position Independent Executable}{A hybrid between +classical \tts{ELF} executables and \tts{ELF} shared libraries. +It has a form of a \tts{ET\_DYN} object like shared libraries and should +contain position independent code, so that the kernel can load +the executable starting at random address to make certain security attacks +harder. Unlike shared libraries it contains \tts{DT\_DEBUG} dynamic +tag, must have \tts{PT\_INTERP} segment with dynamic linker's path, +must have meaningful code at its \tts{e\_entry} and can use symbol +lookup assumptions normal executables can make, particularly that +no symbol defined in the executable can be overridden by a shared +library symbol} have their load address fixed already during linking. +For shared libraries, \tts{prelink} needs something similar to \tts{a.out} +registry of virtual address space slots. Maintaining such registry +across all installations wouldn't scale well, so \tts{prelink} instead +assigns these virtual address space slots on the fly after looking at +all executables it is supposed to speed up and all their dependent shared +libraries. The next step is to actually relocate shared libraries +to the assigned base address. + +When this is done, the actual prelinking of shared libraries can be done. +First, all dependent shared libraries need to be prelinked (\tts{prelink} +doesn't support circular dependencies between shared libraries, will just +warn about them instead of prelinking the libraries in the cycle), then for each +relocation in the shared library \tts{prelink} needs to look up the symbol +in natural symbol search scope of the shared library (the shared library +itself first, then breadth first search of all dependent shared libraries) and +apply the relocation to the symbol's target section. The symbol lookup code +in the dynamic linker is quite complex and big, so to avoid duplicating all +this, \tts{prelink} has chosen to use dynamic linker to do the symbol lookups. +Dynamic linker is told via a special environment variable it should print +all performed symbol lookups and their type and \tts{prelink} reads this +output through a pipe. As one of the requirements was that +prelinked shared libraries must be usable even for non-prelinked executables +(duplicating all shared libraries so that there are pristine and prelinked +copies would be very unfriendly to RAM usage), \tts{prelink} has to ensure +that by applying the relocation no information is lost and thus relocation +processing can be cheaply done at startup time of non-prelinked executables. +For \tts{RELA} architectures this is easier, because the content +of the relocation's target memory is not needed when processing the relocation. +\footnote{Relative relocations on certain \tts{RELA} architectures use +relocation target's memory, either alone or together with \tts{r\_addend} +field.} For \tts{REL} architectures this is not the case. +\tts{prelink} attempts some tricks described +later and if they fail, needs to convert the \tts{REL} relocation section +to \tts{RELA} format where addend is stored in the relocation section +instead of relocation target's memory. + +When all shared libraries an executable (directly or indirectly) depends on +are prelinked, relocations in the executable are handled similarly to +relocations in shared libraries. Unfortunately, not all symbols resolve the +same when looked up in a shared library's natural symbol search scope +(i.e. as it is done at the time the shared library is prelinked) and when +looked up in application's global symbol search scope. Such symbols are +herein called {\sl conflicts} and the relocations against those symbols +{\sl conflicting relocations}. Conflicts depend on the executable, all its +shared libraries and their respective order. They are only computable +for the shared libraries linked to the executable (libraries mentioned in +\tts{DT\_NEEDED} dynamic tags and shared libraries they transitively need). +The set of shared libraries loaded via \tts{dlopen(3)} cannot be predicted +by \tts{prelink}, neither can the order in which this happened, nor the time +when they are unloaded. When the dynamic linker prints symbol lookups +done in the executable, it also prints conflicts. \tts{Prelink} then +takes all relocations against those symbols and builds a special +\tts{RELA} section with conflict fixups and stores it into the +prelinked executable. Also a list of all dependent shared libraries +in the order they appear in the symbol search scope, together +with their checksums and times of prelinking is stored in another special +section. + +The dynamic linker first checks if it is itself prelinked. If yes, +it can avoid its preliminary relocation processing (this one is done +with just the dynamic linker itself in the search scope, so that +all routines in the dynamic linker can be used easily without too many +limitations). When it is about to start a program, it first looks +at the library list section created by \tts{prelink} (if any) and +checks whether they are present in symbol search scope in the same +order, none have been modified since prelinking and that there aren't any +new shared libraries loaded either. If all these conditions are +satisfied, prelinking can be used. In that case the dynamic linker +processes the fixup section and skips all normal relocation handling. +If one or more of the conditions are not met, the dynamic linker continues +with normal relocation processing in the executable and all shared libraries. + +\section{Collecting executables and libraries which should be prelinked} + +Before the actual work can start the \tts{prelink} tool needs to collect the +filenames of executables and libraries it is supposed to prelink. +It doesn't make any sense to prelink a shared library if no executable is +linked against it because the prelinking information will not be used anyway. +Furthermore, when \tts{prelink} needs to do a \tts{REL} to \tts{RELA} +conversion of relocation sections in the shared library (see later) +or when it needs to convert \tts{SHT\_NOBITS} \tts{PLT} section to +\tts{SHT\_PROGBITS}, a prelinked shared library might grow in size and so +prelinking is only desirable if it will speed up startup of some +program. The only change which might be useful even for shared libraries +which are never linked against, only loaded using \tts{dlopen}, is +relocating to a unique address. This is useful if there are many relative +relocations and there are pages in the shared library's writable segment +which are never written into with the exception of those relative +relocations. Such shared libraries are rare, so \tts{prelink} doesn't +handle these automatically, instead the administrator or developer can +use \tts{prelink --reloc-only={\sl ADDRESS}} to relocate it manually. +Prelinking an executable requires all shared libraries it is linked against +to be prelinked already. + +\tts{Prelink} has two main modes in which it collects filenames. +One is {\sl incremental prelinking}, where \tts{prelink} is invoked without +the \tts{-a} option. In this mode, \tts{prelink} queues for prelinking +all executables and shared libraries given on the command line, all executables +in directory trees specified on the command line, and all shared libraries +those executables and shared libraries are linked against. +For the reasons mentioned earlier a shared library is queued only if a +program is linked with it or the user tells the tool to do it anyway +by explicitly mentioning it on the command line. +The second mode is {\sl full prelinking}, where the \tts{-a} option is +given on the command line. This in addition to incremental prelinking +queues all executables found in directory trees specified in \tts{prelink.conf} +(which typically includes all or most directories where system executables +are found). For each directory subtree in the config file the user +can specify whether symbolic links to places outside of the tree are to be followed +or not and whether searching should continue even across filesystem +boundaries. + +There is also an option to blacklist some executables or directory trees +so that the executables or anything in the directory trees will not +be prelinked. This can be specified either on the command line or in +the config file. + +\tts{Prelink} will not attempt to change executables which use a non-standard +dynamic linker +\footnote{Standard dynamic linker path is hardcoded in the executable for each +architecture. It can be overridden from the command line, but only with +one dynamic linker name (normally, multiple standard dynamic linkers are +used when prelinking mixed architecture systems).} +for security reasons, because it actually needs to execute the dynamic +linker for symbol lookup and it needs to avoid executing some random +unknown executable with the permissions with which \tts{prelink} is run +(typically \tts{root}, with the permissions at least for changing all +executables and shared libraries in the system). The administrator should +ensure that \tts{prelink.conf} doesn't contain world-writable directories +and such directories are not given to the tool on the command line either, +but the tool should be distrustful of the objects nevertheless. + +Also, \tts{prelink} will not change shared libraries which are not specified +directly on the command line or located in the directory trees specified on the +command line or in the config file. This is so that +e.g. \tts{prelink} doesn't try to change shared libraries on shared +networked filesystems, or at least it is possible to configure the tool +so that it doesn't do it. + +For each executable and shared library it collects, \tts{prelink} executes +the dynamic linker to list all shared libraries it depends on, checks if +it is already prelinked and whether any of its dependencies changed. +Objects which are already prelinked and have no dependencies which changed +don't have to be prelinked again (with the exception when e.g. virtual +address space layout code finds out it needs to assign new virtual address space slots +for the shared library or one of its dependencies). Running the dynamic +linker to get the symbol lookup information is a quite costly +operation especially on systems with many executables and shared libraries +installed, so \tts{prelink} offers a faster \tts{-q} mode. In all modes, +\tts{prelink} stores modification and change times of each shared library +and executable together with all object dependencies and other information +into \tts{prelink.cache} file. When prelinking in \tts{-q} mode, it +just compares modification and change times of the executables and shared +libraries (and all their dependencies). Change time is needed because +\tts{prelink} preserves modification time when prelinking (as well as +permissions, owner and group). If the times match, it assumes the +file has not changed since last prelinking. Therefore the file can be +skipped if it is already prelinked and none of the dependencies changed. +If any time changed or one of the dependencies changed, it invokes the +dynamic linker the same way as in normal mode to find out real dependencies, +whether it has been prelinked or not etc. The collecting phase in normal +mode can take a few minutes, while in quick mode usually takes just a few +seconds, as the only operation it does is it calls just lots of \tts{stat} +system calls. + +\section{Assigning virtual address space slots} + +\tts{Prelink} has to ensure at least that for all successfully prelinked +executables all shared libraries they are (transitively) linked against +have non-overlapping virtual address space slots (furthermore they +cannot overlap with the virtual address space range used by the executable +itself, its \tts{brk} area, typical stack location and \tts{ld.so.cache} +and other files mmaped by the dynamic linker in early stages of dynamic +linking (before all dependencies are mmaped). If there were any overlaps, +the dynamic linker (which mmaps the shared libraries at the desired location +without \tts{MAP\_FIXED} mmap flag so that it is only soft requirement) would +not manage to mmap them at the assigned locations and the prelinking +information would be invalidated (the dynamic linker would have to do all +normal relocation handling and symbol lookups). Executables are linked against +very wide variety of shared library combinations and that has to be taken +into account. + +The simplest approach is to sort shared libraries by descending +usage count (so that most often used shared libraries like the dynamic +linker, \tts{libc.so} etc. are close to each other) and assign them +consecutive slots starting at some architecture specific base address +(with a page or two in between the shared libraries to allow for a limited +growth of shared libraries without having to reposition them). +\tts{Prelink} has to find out which shared libraries will need +a \tts{REL} to \tts{RELA} conversion of relocation sections +and for those which will need the conversion count with the increased size +of the library's loadable segments. This is \tts{prelink} behavior without +\tts{-m} and \tts{-R} options. + +The architecture specific base address is best located a few megabytes above +the location where \tts{mmap} with \tts{NULL} first argument and without +\tts{MAP\_FIXED} starts allocating memory areas (in Linux this is the value +of \tts{TASK\_UNMAPPED\_BASE} macro). +\footnote{\tts{TASK\_UNMAPPED\_BASE} has been chosen +on each platform so that there is enough virtual memory for both the +\tts{brk} area (between executable's end and this memory address) and \tts{mmap} +area (between this address and bottom of stack).} The reason for not +starting to assign addresses in \tts{prelink} immediately at +\tts{TASK\_UNMAPPED\_BASE} is that \tts{ld.so.cache} and other mappings by +the dynamic linker will end up in the same range and could overlap with +the shared libraries. Also, if some application uses \tts{dlopen} to load +a shared library which has been prelinked, +\footnote{Typically this is because some other executable is linked against that +shared library directly.} +those few megabytes above \tts{TASK\_UNMAPPED\_BASE} increase the probability +that the stack slot will be still unused (it can clash with e.g. +non-prelinked shared libraries loaded by \tts{dlopen} earlier +\footnote{If shared libraries have first \tts{PT\_LOAD} segment's virtual +address zero, the kernel typically picks first empty slot above +\tts{TASK\_UNMAPPED\_BASE} big enough for the mapping.} or other kinds +of mmap calls with \tts{NULL} first argument like \tts{malloc} allocating +big chunks of memory, mmaping of locale database, etc.). + +This simplest approach is unfortunately problematic on 32-bit (or 31-bit) +architectures where the total virtual address space for a process is +somewhere between 2GB (S/390) and almost 4GB (Linux IA-32 4GB/4GB kernel +split, AMD64 running 32-bit processes, etc.). Typical installations these +days contain thousands of shared libraries and if each of them is given a +unique address space slot, on average executables will have pretty sparse +mapping of its shared libraries and there will be less contiguous virtual +memory for application's own use +\footnote{Especially databases look these days for every byte of virtual +address space on 32-bit architectures.}. + +\tts{Prelink} has a special mode, turned on with \tts{-m} option, in which +it computes what shared libraries are ever loaded together in some executable +(not considering \tts{dlopen}). If two shared libraries are ever loaded +together, \tts{prelink} assigns them different virtual address space slots, +but if they never appear together, it can give them overlapping addresses. +For example applications using \tts{KDE} toolkit link typically against many +\tts{KDE} shared libraries, programs written using the \tts{Gtk+} toolkit +link typically against many \tts{Gtk+} shared libraries, but there are just +very few programs which link against both \tts{KDE} and \tts{Gtk+} shared +libraries, and even if they do, they link against very small subset of those +shared libraries. So all \tts{KDE} shared libraries not in that subset can +use overlapping addresses with all \tts{Gtk+} shared libraries but the +few exceptions. This leads to considerably smaller virtual address space +range used by all prelinked shared libraries, but it has its own +disadvantages too. It doesn't work too well with incremental prelinking, +because then not all executables are investigated, just those which are given +on \tts{prelink}'s command line. \tts{Prelink} also considers executables +in \tts{prelink.cache}, but it has no information about executables which have +not been prelinked yet. If a new executable, which links against some shared +libraries which never appeared together before, is prelinked later, +\tts{prelink} has to assign them new, non-overlapping addresses. +This means that any executables, which linked against the library +that has been moved and re-prelinked, need to be prelinked again. +If this happened during incremental prelinking, \tts{prelink} will +fix up only the executables given on the command line, leaving other +executables untouched. The untouched executables would not be able to +benefit from prelinking anymore. + +Although with the above two layout schemes shared library addresses can +vary slightly between different hosts running the same distribution +(depending on the exact set of installed executables and libraries), especially +the most often used shared libraries will have identical base addresses +on different computers. This is often not desirable for security reasons, +because it makes it slightly easier for various exploits to jump to routines +they want. Standard Linux kernels assign always the same addresses to +shared libraries loaded by the application at each run, so with these +kernels \tts{prelink} doesn't make things worse. But there are kernel +patches, such as Red Hat's \tts{Exec-Shield}, which randomize memory +mappings on each run. If shared libraries are prelinked, they cannot +be assigned different addresses on each run (prelinking information can +be only used to speed up startup if they are mapped at the base addresses +which was used during prelinking), which +means prelinking might not be desirable on some edge servers. +\tts{Prelink} can assign different addresses on different hosts though, +which is almost the same as assigning random addresses on each run +for long running processes such as daemons. Furthermore, the administrator +can force full prelinking and assignment of new random addresses every few +days (if he is also willing to restart the services, so that the old +shared libraries and executables don't have to be kept in memory). + +To assign random addresses \tts{prelink} has the \tts{-R} option. +This causes a random starting address somewhere in the architecture specific +range in which shared libraries are assigned, and minor random reshuffling +in the queue of shared libraries which need address assignment (normally +it is sorted by descending usage count, with randomization shared libraries +which are not very far away from each other in the sorted list can be +swapped). The \tts{-R} option should work orthogonally to the \tts{-m} +option. + +Some architectures have special further requirements on shared library +address assignment. On 32-bit PowerPC, if shared libraries are located +close to the executable, so that everything fits into 32MB area, \tts{PLT} +slots resolving to those shared libraries can use the branch relative +instruction instead of more expensive sequences involving memory load +and indirect branch. If shared libraries are located in the +first 32MB of address space, \tts{PLT} slots resolving to those shared +libraries can use the branch absolute instruction (but already \tts{PLT} +slots in those shared libraries resolving to addresses in the executable +cannot be done cheaply). This means for optimization \tts{prelink} +should assign addresses from a 24MB region below the executable first, assuming +most of the executables are smaller than those remaining 8MB. +\tts{prelink} assigns these from higher to lower addresses. When this +region is full, \tts{prelink} starts from address 0x40000 +\footnote{To leave some pages unmapped to catch \tts{NULL} pointer +dereferences.} up till the bottom of the first area. Only when +all these areas are full, \tts{prelink} starts picking addresses high above +the executable, so that sufficient space is left in between to leave room +for \tts{brk}. +When \tts{-R} option is specified, \tts{prelink} needs to honor it, but +in a way which doesn't totally kill this optimization. So it picks up +a random start base within each of the 3 regions separately, splitting +them into 6 regions. + +Another architecture which needs to be handled specially is IA-32 +when using \tts{Exec-Shield}. The IA-32 architecture doesn't have an +bit to disable execution for each page, only for each segment. All readable +pages are normally executable. This means the stack is usually executable, +as is memory allocated by \tts{malloc}. This is undesirable for security reasons, +exploits can then overflow a buffer on the stack to transfer control +to code it creates on the stack. +Only very few programs actually need an executable stack. For example +programs using GCC trampolines for nested functions need it or when +an application itself creates executable code on the stack and calls it. +\tts{Exec-Shield} works around this IA-32 architecture deficiency +by using a separate code segment, which starts at address 0 and spans +address space until its limit, highest page which needs to +be executable. This is dynamically changed when some page with higher +address than the limit needs to be executable (either because of \tts{mmap} +with \tts{PROT\_EXEC} bit set, or \tts{mprotect} with \tts{PROT\_EXEC} +of an existing mapping). This kind of protection is of course only +effective if the limit is as low as possible. The kernel tries to +put all new mappings with \tts{PROT\_EXEC} set and \tts{NULL} address low. +If possible into {\sl ASCII Shield area} (first 16MB of address space) +\nomenclature{ASCII Shield area}{First 16MB of address space on 32-bit +architectures. These addresses have zeros in upper 8 bits, +which on little endian architectures are stored as last byte of the address +and on big endian architectures as first byte of the address. +A zero byte terminates string, so it is hard to control the exact +arguments of a function if they are placed on the stack above the +address. On big endian machines, it is even hard to control the +low 24 bits of the address}, if not, at least below the executable. +If \tts{prelink} detects \tts{Exec-Shield}, it tries to do the same as +kernel when assigning addresses, i.e. prefers to assign addresses in +{\sl ASCII Shield area} and continues with other addresses below +the program. It needs to leave first 1MB plus 4KB of address space +unallocated though, because that range is often used by programs +using \tts{vm86} system call. + +\section{Relocation of libraries} + +When a shared library has a base address assigned, it needs to be relocated +so that the base address is equal to the first \tts{PT\_LOAD} segment's +\tts{p\_vaddr}. The effect of this operation should be bitwise identical +as if the library were linked with that base address originally. +That is, the following scripts should produce identical output: + +\noindent{{\small\begin{verbatim} +$ gcc -g -shared -o libfoo.so.1.0.0 -Wl,-h,libfoo.so.1 \ + input1.o input2.o somelib.a +$ prelink --reloc-only=0x54321000 libfoo.so.1.0.0 +\end{verbatim} +\prelinklistingcaption{Script to relocate a shared library after linking using \tts{prelink}}} + +and: +\noindent{\small\begin{verbatim} +$ gcc -shared -Wl,--verbose 2>&1 > /dev/null \ + | sed -e '/^======/,/^======/!d' \ + -e '/^======/d;s/0\( + SIZEOF_HEADERS\)/0x54321000\1/' \ + > libfoo.so.lds +$ gcc -Wl,-T,libfoo.so.lds -g -shared -o libfoo.so.1.0.0 \ + -Wl,-h,libfoo.so.1 input1.o input2.o somelib.a +\end{verbatim}} +\prelinklistingcaption{Script to link a shared library at non-standard base}} + +The first script creates a normal shared library with the default +base address 0 and then uses \tts{prelink}'s special mode when it just +relocates a library to a given address. The second script first modifies +a built-in GNU linker script for linking of shared libraries, so that +the base address is the one given instead of zero and stores it into a +temporary file. Then it creates a shared library using that linker script. + +The relocation operation involves mostly adding the difference between +old and new base address to all \tts{ELF} fields which contain values +representing virtual addresses of the shared library +(or in the program header table also representing physical addresses). +File offsets need to be unmodified. Most places where the adjustments +need to be done are clear, \tts{prelink} just has to watch \tts{ELF} spec +to see which fields contain virtual addresses. + +One problem is with absolute symbols. \tts{Prelink} has no way to find +out if an absolute symbol in a shared library is really meant as +absolute and thus not changing during relocation, or if it is an address +of some place in the shared library outside of any section or on their +edge. For instance symbols created in the GNU linker's script outside +of section directives have all \tts{SHN\_ABS} section, yet they can be +location in the library (e.g. \tts{symbolfoo~=~.}) or they can be absolute +(e.g. \tts{symbolbar~=~0x12345000}). This distinction is lost at link +time. But the dynamic linker when looking up symbols doesn't make any +distinction between them, all addresses during dynamic lookup have the +load offset added to it. \tts{Prelink} chooses to relocate any absolute +symbols with value bigger than zero, that way \tts{prelink --reloc-only} +gets bitwise identical output with linking directly at the different base +in almost all real-world cases. Thread Local Storage symbols (those with +\tts{STT\_TLS} type) are never relocated, as their values are relative +to start of shared library's thread local area. + +When relocating the dynamic section there are no bits which tell if +a particular dynamic tag uses \tts{d\_un.d\_ptr} (which needs to +be adjusted) or \tts{d\_un.d\_val} (which needs to be left as is). +So \tts{prelink} has to hardcode a list of well known architecture +independent dynamic tags which need adjusting and have a hook for +architecture specific dynamic tag adjustment. Sun came up with +\tts{DT\_ADDRRNGLO} to \tts{DT\_ADDRRNGHI} and \tts{DT\_VALRNGLO} +to \tts{DT\_VALRNGHI} dynamic tag number ranges, so at least as +long as these ranges are used for new dynamic tags \tts{prelink} +can relocate correctly even without listing them all explicitly. + +When relocating \tts{.rela.*} or \tts{.rel.*} sections, which is +done in architecture specific code, relative relocations and on \tts{.got.plt} +using architectures also \tts{PLT} relocations typically need an +adjustment. The adjustment needs to be done in either \tts{r\_addend} field +of the \tts{ElfNN\_Rela} structure, in the memory pointed by \tts{r\_offset}, +or in both locations. +On some architectures what needs adjusting is not even the same for all relative relocations. +Relative relocations against some sections need to have \tts{r\_addend} +adjusted while others need to have memory adjusted. +On many architectures, first few words in \tts{GOT} are special and some +of them need adjustment. + +The hardest part of the adjustment is handling the debugging sections. +These are non-allocated sections which typically have no corresponding +relocation section associated with them. \tts{Prelink} has to match the various +debuggers in what fields it adjusts and what are skipped. +As of this writing \tts{prelink} should handle +\href{http://www.eagercon.com/dwarf/dwarf-2.0.0.pdf}% +{\tts{DWARF 2} [15]} standard as corrected (and extended) by +\href{http://reality.sgiweb.org/davea/dwarf3-draft8-011125.pdf}% +{\tts{DWARF 3 draft} [16]}, +\href{http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/gdb/doc/stabs.texinfo?cvsroot=src}% +{\tts{Stabs} [17]} with GCC extensions and Alpha or MIPS \tts{Mdebug}. + +\tts{DWARF 2} debugging information involves many separate sections, +each of them with a unique format which needs to be relocated differently. +For relocation of the \tts{.debug\_info} section compilation units \tts{prelink} has to +parse the corresponding part of the \tts{.debug\_abbrev} section, adjust all +values of attributes that are using the \tts{DW\_FORM\_addr} form and adjust embedded +location lists. \tts{.debug\_ranges} and \tts{.debug\_loc} section +portions depend on the exact place in \tts{.debug\_info} section from +which they are referenced, so that \tts{prelink} can keep track of their +base address. \tts{DWARF} debugging format is very extendable, so +\tts{prelink} needs to be very conservative when it sees unknown extensions. +It needs to fail prelinking instead of silently break debugging information +if it sees an unknown \tts{.debug\_*} section, unknown attribute form +or unknown attribute with one of the \tts{DW\_FORM\_block*} forms, as +they can potentially embed addresses which would need adjustment. + +For \tts{stabs} \tts{prelink} tried to match GDB behavior. For +\tts{N\_FUN}, it needs to differentiate between function start and +function address which are both encoded with this type, the rest of types +either always need relocating or never. And similarly to \tts{DWARF 2} +handling, it needs to reject unknown types. + +The relocation code in \tts{prelink} is a little bit more generic +than what is described above, as it is used also by other parts of +\tts{prelink}, when growing sections in a middle of the shared library +during \tts{REL} to \tts{RELA} conversion. All adjustment functions +get passed both the offset it should add to virtual addresses and +a start address. Adjustment is only done if the old virtual address +was bigger or equal than the start address. + +\section{REL to RELA conversion} + +On architectures which normally use the \tts{REL} format for relocations instead +of \tts{RELA} (IA-32, ARM and MIPS), if certain relocation types use the +memory \tts{r\_offset} points to during relocation, \tts{prelink} has to +either convert them to a different relocation type which doesn't use +the memory value, or the whole \tts{.rel.dyn} section needs to be converted +to \tts{RELA} format. Let's describe it on an example on IA-32 architecture: + +\noindent{{\small\begin{verbatim} +$ cat > test1.c <<EOF +extern int i[4]; +int *j = i + 2; +EOF +$ cat > test2.c <<EOF +int i[4]; +EOF +$ gcc -nostdlib -shared -fpic -s -o test2.so test2.c +$ gcc -nostdlib -shared -fpic -o test1.so test1.c ./test2.so +$ readelf -l test1.so | grep LOAD | head -1 + LOAD 0x000000 0x00000000 0x00000000 0x002b8 0x002b8 R E 0x1000 +$ readelf -l test2.so | grep LOAD | head -1 + LOAD 0x000000 0x00000000 0x00000000 0x00244 0x00244 R E 0x1000 +$ readelf -r test1.so + +Relocation section '.rel.dyn' at offset 0x2b0 contains 1 entries: + Offset Info Type Sym.Value Sym. Name +000012b8 00000d01 R_386_32 00000000 i +$ objdump -s -j .data test1.so + +test1.so: file format elf32-i386 + +Contents of section .data: + 12b8 08000000 .... +$ readelf -s test2.so | grep i\$ + 11: 000012a8 16 OBJECT GLOBAL DEFAULT 8 i +$ prelink -N ./test1.so ./test2.so +$ readelf -l test1.so | grep LOAD | head -1 + LOAD 0x000000 0x04dba000 0x04dba000 0x002bc 0x002bc R E 0x1000 +$ readelf -l test2.so | grep LOAD | head -1 + LOAD 0x000000 0x04db6000 0x04db6000 0x00244 0x00244 R E 0x1000 +$ readelf -r test1.so + +Relocation section '.rel.dyn' at offset 0x2b0 contains 1 entries: + Offset Info Type Sym.Value Sym. Name + Addend +04dbb2bc 00000d01 R_386_32 00000000 i + 8 +$ objdump -s -j .data test1.so + +test1.so: file format elf32-i386 + +Contents of section .data: + 4dbb2bc b072db04 .r.. +$ readelf -s test2.so | grep i\$ + 11: 04db72a8 16 OBJECT GLOBAL DEFAULT 8 i +\end{verbatim}} +\prelinklistingcaption{\tts{REL} to \tts{RELA} conversion example}} + +This relocation is against {\sl i + 8}, where the addend is stored at the memory +location pointed by \tts{r\_offset}. \tts{Prelink} assigned base address +0x4dba000 to \tts{test1.so} and 0x4db6000 to \tts{test2.so}. +\tts{Prelink} above converted the \tts{REL} section in \tts{test1.so} to +\tts{RELA}, but let's assume it did not. All output containing {\sl 2bc} +above would change to {\sl 2b8} (that changed above only because \tts{.rel.dyn} +section grew up by 4 bytes during the conversion to \tts{RELA} format), +the rest would stay unchanged. +When some program linked against \tts{test1.so} was prelinked, +the (only) relocation in \tts{test1.so} would not be used and {\sl j} would +contain the right value, 0x4db72b0 (address of {\sl i + 8}; note that IA-32 +is little endian, so the values in .data section are harder to read +for a human). Now, let's assume one of the shared libraries the executable +is linked against is upgraded. This means prelink information cannot +be used, as it is out of date. Let's assume it was a library other +than \tts{test2.so}. Normal relocation processing for \tts{test1.so} +needs to happen. Standard \tts{R\_386\_32} calculation is \tts{S~+~A}, +in this case 0x4db72a8 + 0x4db72b0 = 0x9b6e558 and {\sl j} contains wrong +value. Either \tts{test2.so} could change and now the {\sl i} variable would +have different address, or some other shared library linked to the executable +could overload symbol {\sl i}. Without additional information the dynamic +linker cannot find out the addend is 8. + +The original value of a symbol could perhaps be stored in some special +allocated section and the dynamic linker could do some magic to locate it, +but it would mean standard relocation handling code in the dynamic linker +cannot be used for relocation processing of prelinked shared libraries +where prelinking information cannot be used. +So \tts{prelink} in this case converts the whole \tts{.rel.dyn} section +into the \tts{RELA} format, the addend is stored in \tts{r\_addend} field +and when doing relocation processing, it really doesn't matter what +value is at the memory location pointed by \tts{r\_offset}. +The disadvantage of this is that the relocation section +grew by 50\%. If prelinking information can be used, it shouldn't matter much, +since the section is never loaded at runtime because it is not accessed. +If prelinking cannot be used, whether because it is out of date or +because the shared library has been +loaded by \tts{dlopen}, it will increase memory footprint, but it is read-only +memory which is typically not used after startup and can be discarded +as it is backed out by the file containing the shared library. + +At least on IA-32, \tts{REL} to \tts{RELA} conversion is not always +necessary. If \tts{R\_386\_32} added is originally 0, \tts{prelink} +can instead change its type to \tts{R\_386\_GLOB\_DAT}, which is a +similar dynamic relocation, but calculated as \tts{S} instead of +\tts{S~+~A}. There is no similar conversion for \tts{R\_386\_PC32} +possible though, on the other side this relocation type should never +appear in position independent shared libraries, only in position +dependent code. On ARM, the situation is the same, just using +different relocation names (\tts{R\_ARM\_32}, \tts{R\_ARM\_GLOB\_DAT} +and \tts{R\_ARM\_PC24}). + +The \tts{.rel.plt} section doesn't have to be converted to \tts{RELA} +format on either of these architectures, if the conversion is needed, +all other \tts{.rel.*} allocated sections, which have to be adjacent +as they are pointed to by \tts{DT\_REL} and \tts{DT\_RELSZ} dynamic tags, +have to be converted together. The conversion itself is fairly easy, +some architecture specific code just has to fetch the original addend +from memory pointed by the relocation and store it into \tts{r\_addend} +field (or clear \tts{r\_addend} if the particular relocation type +never uses the addend). The main problem is that when the conversion +happens, the \tts{.rel.dyn} section grows by 50\% and there needs to be +room for that in the read-only loadable segment of the shared library. + +In shared libraries it is always possible to grow the first read-only +\tts{PT\_LOAD} segment by adding the additional data at the beginning +of the read-only segment, as the shared library is relocatable. +\tts{Prelink} can relocate the whole shared library to a higher address +than it has assigned for it. The file offsets of all sections +and the section header table file offset need to be increased, +but the \tts{ELF} header and program headers need to stay at the beginning +of the file. The relocation section can then be moved to the newly created +space between the end of the program header table and the first section. + +Moving the section from the old location to the newly created space +would leave often very big gap in virtual address space as well as in +the file at the old location of the relocation section. Fortunately the +linker typically puts special \tts{ELF} sections including allocated +relocation section before the code section and other read-only sections +under user's control. These special sections are intended for dynamic +linking only. Their addresses are stored just in the \tts{.dynamic} section +and \tts{prelink} can easily adjust them there. There is no need for +a shared library to store address of one of the special sections +into its code or data sections and existing linkers in fact don't create +such references. When growing the relocation section, \tts{prelink} +checks whether all sections before the relocation section are +special +\footnote{As special sections \tts{prelink} considers sections with +\tts{SHT\_NOTE}, \tts{SHT\_HASH}, \tts{SHT\_DYNSYM}, \tts{SHT\_STRTAB}, +\tts{SHT\_GNU\_verdef}, \tts{SHT\_GNU\_verneed}, \tts{SHT\_GNU\_versym}, +\tts{SHT\_REL} or \tts{SHT\_RELA} type or the \tts{.interp} section.} +and if they are, just moves them to lower addresses, so that the +newly created space is right above the relocation section. +The advantage is that instead of moving all sections by the size of +the new relocation section they can be adjusted ideally just by the +difference between old and new relocation section size. + +There are two factors which can increase the necessary adjustment of +all higher sections. The first is required section alignment of any +allocated section above the relocation section. \tts{Prelink} needs +to find the highest section alignment among those sections and +increase the adjustment from the difference between old and new +relocation section up to the next multiple of that alignment. + +The second factor is only relevant to shared libraries where linker +optimized the data segment placement. Traditionally linker assigned +the end address of the read-only segment plus the architecture's +maximum \tts{ELF} page size as the start address of the read-write +segment. While this created smallest file sizes of the shared libraries, +it often wasted one page in the read-write segment because of partial +pages. When linker optimizes such that less space is wasted in partial +pages, the distance between read-only and read-write segments can be +smaller than architecture specific maximum \tts{ELF} page size. +\tts{Prelink} has to take this into account, so that when adjusting +the sections the read-only and read-write segment don't end up on the +same page. Unfortunately \tts{prelink} cannot increase or decrease +the distance between the read-only and read-write segments, since +it is possible that the shared library has relative addresses of +any allocated code, data or \tts{.bss} sections +stored in its sections without any relocations which would allow +\tts{prelink} to change them. \tts{Prelink} has to move all sections +starting with the first allocated \tts{SHT\_PROGBITS} section other +than \tts{.interp} up to the last allocated \tts{SHT\_PROGBITS} or +\tts{SHT\_NOBITS} section as a block and thus needs to increase +the adjustment in steps of the highest section alignment as many times +times as needed so that the segments end up in different pages. +Below are 3 examples: + +\noindent{{\small\begin{verbatim} +$ cat > test1.c <<EOF +int i[2] __attribute__((aligned (32))); +#define J1(N) int *j##N = &i[1]; +#define J2(N) J1(N##0) J1(N##1) J1(N##2) J1(N##3) J1(N##4) +#define J3(N) J2(N##0) J2(N##1) J2(N##2) J2(N##3) J2(N##4) +#define J4(N) J3(N##0) J3(N##1) J3(N##2) J3(N##3) J3(N##4) +J4(0) J4(1) J3(2) J3(3) J1(4) +const int l[256] = { [10] = 1 }; +/* Put a zero sized section at the end of read-only segment, + so that the end address of the segment is printed. */ +asm (".section ro_seg_end, \"a\"; .previous"); +EOF +$ gcc -shared -O2 -nostdlib -fpic -o test1.so test1.c +$ readelf -S test1.so | grep '^ \[' + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .hash HASH 000000b4 0000b4 000930 04 A 2 0 4 + [ 2] .dynsym DYNSYM 000009e4 0009e4 001430 10 A 3 d 4 + [ 3] .dynstr STRTAB 00001e14 001e14 000735 00 A 0 0 1 + [ 4] .rel.dyn REL 0000254c 00254c 000968 08 A 2 0 4 + [ 5] .text PROGBITS 00002eb4 002eb4 000000 00 AX 0 0 4 + [ 6] .rodata PROGBITS 00002ec0 002ec0 000400 00 A 0 0 32 + [ 7] ro_seg_end PROGBITS 000032c0 0032c0 000000 00 A 0 0 1 + [ 8] .data PROGBITS 000042c0 0032c0 0004b4 00 WA 0 0 4 + [ 9] .dynamic DYNAMIC 00004774 003774 000070 08 WA 3 0 4 + [10] .got PROGBITS 000047e4 0037e4 00000c 04 WA 0 0 4 + [11] .bss NOBITS 00004800 003800 000008 00 WA 0 0 32 + [12] .comment PROGBITS 00000000 003800 000033 00 0 0 1 + [13] .shstrtab STRTAB 00000000 003833 000075 00 0 0 1 + [14] .symtab SYMTAB 00000000 003b28 001470 10 15 11 4 + [15] .strtab STRTAB 00000000 004f98 000742 00 0 0 1 +$ readelf -l test1.so | grep LOAD + LOAD 0x000000 0x00000000 0x00000000 0x032c0 0x032c0 R E 0x1000 + LOAD 0x0032c0 0x000042c0 0x000042c0 0x00530 0x00548 RW 0x1000 +$ prelink -N ./test1.so +$ readelf -l test1.so | grep LOAD + LOAD 0x000000 0x02000000 0x02000000 0x03780 0x03780 R E 0x1000 + LOAD 0x003780 0x02004780 0x02004780 0x00530 0x00548 RW 0x1000 +$ readelf -S test1.so | grep '^ \[' + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .hash HASH 020000b4 0000b4 000930 04 A 2 0 4 + [ 2] .dynsym DYNSYM 020009e4 0009e4 001430 10 A 3 d 4 + [ 3] .dynstr STRTAB 02001e14 001e14 000735 00 A 0 0 1 + [ 4] .rel.dyn RELA 0200254c 00254c 000e1c 0c A 2 0 4 + [ 5] .text PROGBITS 02003374 003374 000000 00 AX 0 0 4 + [ 6] .rodata PROGBITS 02003380 003380 000400 00 A 0 0 32 + [ 7] ro_seg_end PROGBITS 02003780 003780 000000 00 A 0 0 1 + [ 8] .data PROGBITS 02004780 003780 0004b4 00 WA 0 0 4 + [ 9] .dynamic DYNAMIC 02004c34 003c34 000070 08 WA 3 0 4 + [10] .got PROGBITS 02004ca4 003ca4 00000c 04 WA 0 0 4 + [11] .bss NOBITS 02004cc0 003cc0 000008 00 WA 0 0 32 + [12] .comment PROGBITS 00000000 003cc0 000033 00 0 0 1 + [13] .gnu.liblist GNU_LIBLIST 00000000 003cf3 000000 14 14 0 4 + [14] .gnu.libstr STRTAB 00000000 003cf3 000000 00 0 0 1 + [15] .gnu.prelink_undo PROGBITS 00000000 003cf4 00030c 01 0 0 4 + [16] .shstrtab STRTAB 00000000 004003 0000a0 00 0 0 1 + [17] .symtab SYMTAB 00000000 0043a0 001470 10 18 11 4 + [18] .strtab STRTAB 00000000 005810 000742 00 0 0 1 +\end{verbatim}} +\prelinklistingcaption{Growing read-only segment with segment distance one page}} + +\begin{figure}[!ht] +\centering +\includegraphics[width=0.8\textwidth]{dso1} +\caption{Growing read-only segment with segment distance one page} +\end{figure} + +In this example the read-write segment starts at address \tts{0x42c0}, which +is one page above the end of read-only segment. \tts{Prelink} needs to grow +the read-only \tts{PT\_LOAD} segment by 50\% of \tts{.rel.dyn} size, i.e. +\tts{0x4b4} bytes. \tts{Prelink} just needs to round that up for the +highest alignment (32 bytes required by \tts{.rodata} or \tts{.bss} +sections) and moves all sections above \tts{.rel.dyn} by \tts{0x4c0} bytes. + +\noindent{{\small\begin{verbatim} +$ cat > test2.c <<EOF +int i[2] __attribute__((aligned (32))); +#define J1(N) int *j##N = &i[1]; +#define J2(N) J1(N##0) J1(N##1) J1(N##2) J1(N##3) J1(N##4) +#define J3(N) J2(N##0) J2(N##1) J2(N##2) J2(N##3) J2(N##4) +#define J4(N) J3(N##0) J3(N##1) J3(N##2) J3(N##3) J3(N##4) +J4(0) J4(1) J3(2) J3(3) J1(4) +const int l[256] = { [10] = 1 }; +int k[670]; +asm (".section ro_seg_end, \"a\"; .previous"); +EOF +$ gcc -shared -O2 -nostdlib -fpic -o test2.so test2.c +$ readelf -S test2.so | grep '^ \[' + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .hash HASH 000000b4 0000b4 000934 04 A 2 0 4 + [ 2] .dynsym DYNSYM 000009e8 0009e8 001440 10 A 3 d 4 + [ 3] .dynstr STRTAB 00001e28 001e28 000737 00 A 0 0 1 + [ 4] .rel.dyn REL 00002560 002560 000968 08 A 2 0 4 + [ 5] .text PROGBITS 00002ec8 002ec8 000000 00 AX 0 0 4 + [ 6] .rodata PROGBITS 00002ee0 002ee0 000400 00 A 0 0 32 + [ 7] ro_seg_end PROGBITS 000032e0 0032e0 000000 00 A 0 0 1 + [ 8] .data PROGBITS 00004000 004000 0004b4 00 WA 0 0 4 + [ 9] .dynamic DYNAMIC 000044b4 0044b4 000070 08 WA 3 0 4 + [10] .got PROGBITS 00004524 004524 00000c 04 WA 0 0 4 + [11] .bss NOBITS 00004540 004540 000a88 00 WA 0 0 32 + [12] .comment PROGBITS 00000000 004540 000033 00 0 0 1 + [13] .shstrtab STRTAB 00000000 004573 000075 00 0 0 1 + [14] .symtab SYMTAB 00000000 004868 001480 10 15 11 4 + [15] .strtab STRTAB 00000000 005ce8 000744 00 0 0 1 +$ readelf -l test2.so | grep LOAD + LOAD 0x000000 0x00000000 0x00000000 0x032e0 0x032e0 R E 0x1000 + LOAD 0x004000 0x00004000 0x00004000 0x00530 0x00fc8 RW 0x1000 +$ prelink -N ./test2.so +$ readelf -l test2.so | grep LOAD + LOAD 0x000000 0x02000000 0x02000000 0x037a0 0x037a0 R E 0x1000 + LOAD 0x0044c0 0x020044c0 0x020044c0 0x00530 0x00fc8 RW 0x1000 +$ readelf -S test2.so | grep '^ \[' + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .hash HASH 020000b4 0000b4 000934 04 A 2 0 4 + [ 2] .dynsym DYNSYM 020009e8 0009e8 001440 10 A 3 d 4 + [ 3] .dynstr STRTAB 02001e28 001e28 000737 00 A 0 0 1 + [ 4] .rel.dyn RELA 02002560 002560 000e1c 0c A 2 0 4 + [ 5] .text PROGBITS 02003388 003388 000000 00 AX 0 0 4 + [ 6] .rodata PROGBITS 020033a0 0033a0 000400 00 A 0 0 32 + [ 7] ro_seg_end PROGBITS 020037a0 0037a0 000000 00 A 0 0 1 + [ 8] .data PROGBITS 020044c0 0044c0 0004b4 00 WA 0 0 4 + [ 9] .dynamic DYNAMIC 02004974 004974 000070 08 WA 3 0 4 + [10] .got PROGBITS 020049e4 0049e4 00000c 04 WA 0 0 4 + [11] .bss NOBITS 02004a00 004a00 000a88 00 WA 0 0 32 + [12] .comment PROGBITS 00000000 004a00 000033 00 0 0 1 + [13] .gnu.liblist GNU_LIBLIST 00000000 004a33 000000 14 14 0 4 + [14] .gnu.libstr STRTAB 00000000 004a33 000000 00 0 0 1 + [15] .gnu.prelink_undo PROGBITS 00000000 004a34 00030c 01 0 0 4 + [16] .shstrtab STRTAB 00000000 004d43 0000a0 00 0 0 1 + [17] .symtab SYMTAB 00000000 0050e0 001480 10 18 11 4 + [18] .strtab STRTAB 00000000 006560 000744 00 0 0 1 +\end{verbatim}} +\prelinklistingcaption{Growing read-only segment not requiring additional padding}} + +\begin{figure}[!ht] +\centering +\includegraphics[width=0.8\textwidth]{dso2} +\caption{Growing read-only segment not requiring additional padding} +\end{figure} + +In the second example \tts{prelink} can grow by just \tts{0x4c0} bytes as +well, eventhough the distance between read-write and read-only segment +is just \tts{0xd20} bytes. With this distance, hypothetical adjustment +by any size less than \tts{0xd21} bytes (modulo 4096) would need just +rounding up to the next multiple of 32 bytes, while adjustments +from \tts{0xd21} up to \tts{0xfe0} would require adjustments in +multiples of 4096 bytes. + +\noindent{{\small\begin{verbatim} +$ cat > test3.c <<EOF +int i[2] __attribute__((aligned (32))); +#define J1(N) int *j##N = &i[1]; +#define J2(N) J1(N##0) J1(N##1) J1(N##2) J1(N##3) J1(N##4) +#define J3(N) J2(N##0) J2(N##1) J2(N##2) J2(N##3) J2(N##4) +#define J4(N) J3(N##0) J3(N##1) J3(N##2) J3(N##3) J3(N##4) +J4(0) J4(1) J3(2) J3(3) J1(4) +int k[670]; +asm (".section ro_seg_end, \"a\"; .previous"); +EOF +$ gcc -shared -O2 -nostdlib -fpic -o test3.so test3.c +$ readelf -S test3.so | grep '^ \[' + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .hash HASH 000000b4 0000b4 00092c 04 A 2 0 4 + [ 2] .dynsym DYNSYM 000009e0 0009e0 001420 10 A 3 c 4 + [ 3] .dynstr STRTAB 00001e00 001e00 000735 00 A 0 0 1 + [ 4] .rel.dyn REL 00002538 002538 000968 08 A 2 0 4 + [ 5] .text PROGBITS 00002ea0 002ea0 000000 00 AX 0 0 4 + [ 6] ro_seg_end PROGBITS 00002ea0 002ea0 000000 00 A 0 0 1 + [ 7] .data PROGBITS 00003000 003000 0004b4 00 WA 0 0 4 + [ 8] .dynamic DYNAMIC 000034b4 0034b4 000070 08 WA 3 0 4 + [ 9] .got PROGBITS 00003524 003524 00000c 04 WA 0 0 4 + [10] .bss NOBITS 00003540 003540 000a88 00 WA 0 0 32 + [11] .comment PROGBITS 00000000 003540 000033 00 0 0 1 + [12] .shstrtab STRTAB 00000000 003573 00006d 00 0 0 1 + [13] .symtab SYMTAB 00000000 003838 001460 10 14 10 4 + [14] .strtab STRTAB 00000000 004c98 000742 00 0 0 1 +$ readelf -l test3.so | grep LOAD + LOAD 0x000000 0x00000000 0x00000000 0x02ea0 0x02ea0 R E 0x1000 + LOAD 0x003000 0x00003000 0x00003000 0x00530 0x00fc8 RW 0x1000 +$ prelink -N ./test3.so +$ readelf -l test3.so | grep LOAD + LOAD 0x000000 0x02000000 0x02000000 0x03ea0 0x03ea0 R E 0x1000 + LOAD 0x004000 0x02004000 0x02004000 0x00530 0x00fc8 RW 0x1000 +$ readelf -S test3.so | grep '^ \[' + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .hash HASH 020000b4 0000b4 00092c 04 A 2 0 4 + [ 2] .dynsym DYNSYM 020009e0 0009e0 001420 10 A 3 c 4 + [ 3] .dynstr STRTAB 02001e00 001e00 000735 00 A 0 0 1 + [ 4] .rel.dyn RELA 02002538 002538 000e1c 0c A 2 0 4 + [ 5] .text PROGBITS 02003ea0 003ea0 000000 00 AX 0 0 4 + [ 6] ro_seg_end PROGBITS 02003ea0 003ea0 000000 00 A 0 0 1 + [ 7] .data PROGBITS 02004000 004000 0004b4 00 WA 0 0 4 + [ 8] .dynamic DYNAMIC 020044b4 0044b4 000070 08 WA 3 0 4 + [ 9] .got PROGBITS 02004524 004524 00000c 04 WA 0 0 4 + [10] .bss NOBITS 02004540 004540 000a88 00 WA 0 0 32 + [11] .comment PROGBITS 00000000 004540 000033 00 0 0 1 + [12] .gnu.liblist GNU_LIBLIST 00000000 004573 000000 14 13 0 4 + [13] .gnu.libstr STRTAB 00000000 004573 000000 00 0 0 1 + [14] .gnu.prelink_undo PROGBITS 00000000 004574 0002e4 01 0 0 4 + [15] .shstrtab STRTAB 00000000 00485b 000098 00 0 0 1 + [16] .symtab SYMTAB 00000000 004bc8 001460 10 17 10 4 + [17] .strtab STRTAB 00000000 006028 000742 00 0 0 1 +\end{verbatim}} +\prelinklistingcaption{Growing read-only segment if page padding needed}} + +\begin{figure}[!ht] +\centering +\includegraphics[width=0.8\textwidth]{dso3} +\caption{Growing read-only segment if page padding needed} +\end{figure} + +In the last example the distance between \tts{PT\_LOAD} segments is very +small, just \tts{0x160} bytes and the adjustment had to be done by 4096 +bytes. + +% Fortunately, shared libraries are position independent, so all absolute +% values in them are either stored in well known \tts{ELF} structures, +% or have corresponding dynamic relocations. The only problem might be +% with relative relocations, which are resolved at link time. +% The start of read-only \tts{PT\_LOAD} segment of shared libraries is +% typically used by special sections used by the dynamic linker +% (\tts{.hash}, \tts{.dynsym}, \tts{.dynstr}, \tts{.gnu.version*}, +% \tts{.rel*}, \tts{.note*}). It makes no sense for a shared library to +% have relocations against these sections or some addresses inside of them, +% furthermore it is impossible to do it without specially crafted +% linker script. So \tts{prelink} makes the assumption that it can grow +% freely the shared library after \tts{.rel.dyn} section, as long +% as only sections mentioned above come before \tts{.rel.dyn} (it actually +% checks section types, not names). \tts{Prelink} certainly can grow the shared library +% size in multiplies of \tts{ELF} architecture specific maximum page size, +% but usually it can do better. Particularly, \tts{prelink} can grow by the 50\% size +% of \tts{.rel.dyn} section rounded up to the largest section alignment +% in all sections following it, but it has to make sure that two +% different \tts{PT\_LOAD} segments (typically the read-only and read-write) +% will not share the same page, otherwise it needs to grow it more in +% multiplies of the maximum section alignment until they are on different +% pages. Growing is done by using the shared library relocation code with +% start address set to end of \tts{.rel.dyn} section. \tts{.rel.plt} +% section is then moved right to the end of \tts{.rel.dyn} section, +% \tts{.dynamic} section needs updating all addresses, type of +% relocation section, segment table needs to be adjusted accordingly +% and file offsets in section header table as well. + +\section{Conflicts} + +As said earlier, if symbol lookup of some symbol in particular shared +library results in different values when that shared library's natural +search scope is used and when using search scope of the application the +DSO is used in, this is considered a {\sl conflict}. +Here is an example of a conflict on IA-32: + +\noindent{{\small\begin{verbatim} +$ cat > test1.c <<EOF +int i; +int *j = &i; +int *foo (void) { return &i; } +EOF +$ cat > test2.c <<EOF +int i; +int *k = &i; +int *bar (void) { return &i; } +EOF +$ cat > test.c <<EOF +#include <stdio.h> +extern int i, *j, *k, *foo (void), bar (void); +int main (void) +{ +#ifdef PRINT_I + printf ("%p\n", &i); +#endif + printf ("%p %p %p %p\n", j, k, foo (), bar ()); +} +EOF +$ gcc -nostdlib -shared -fpic -s -o test1.so test1.c +$ gcc -nostdlib -shared -fpic -o test2.so test2.c ./test1.so +$ gcc -o test test.c ./test2.so ./test1.so +$ ./test +0x16137c 0x16137c 0x16137c 0x16137c +$ readelf -r ./test1.so + +Relocation section '.rel.dyn' at offset 0x2bc contains 2 entries: + Offset Info Type Sym.Value Sym. Name +000012e4 00000d01 R_386_32 00001368 i +00001364 00000d06 R_386_GLOB_DAT 00001368 i +$ prelink -N ./test ./test1.so ./test2.so +$ LD_WARN= LD_TRACE_PRELINKING=1 LD_BIND_NOW=1 /lib/ld-linux.so.2 ./test1.so + ./test1.so => ./test1.so (0x04db6000, 0x00000000) +$ LD_WARN= LD_TRACE_PRELINKING=1 LD_BIND_NOW=1 /lib/ld-linux.so.2 ./test2.so + ./test2.so => ./test2.so (0x04dba000, 0x00000000) + ./test1.so => ./test1.so (0x04db6000, 0x00000000) +$ LD_WARN= LD_TRACE_PRELINKING=1 LD_BIND_NOW=1 /lib/ld-linux.so.2 ./test \ + | sed 's/^[[:space:]]*/ /' + ./test => ./test (0x08048000, 0x00000000) + ./test2.so => ./test2.so (0x04dba000, 0x00000000) + ./test1.so => ./test1.so (0x04db6000, 0x00000000) + libc.so.6 => /lib/tls/libc.so.6 (0x00b22000, 0x00000000) TLS(0x1, 0x00000028) + /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x00b0a000, 0x00000000) +$ readelf -S ./test1.so | grep '\.data\|\.got' + [ 6] .data PROGBITS 04db72e4 0002e4 000004 00 WA 0 0 4 + [ 8] .got PROGBITS 04db7358 000358 000010 04 WA 0 0 4 +$ readelf -r ./test1.so + +Relocation section '.rel.dyn' at offset 0x2bc contains 2 entries: + Offset Info Type Sym.Value Sym. Name +04db72e4 00000d06 R_386_GLOB_DAT 04db7368 i +04db7364 00000d06 R_386_GLOB_DAT 04db7368 i +$ objdump -s -j .got -j .data test1.so + +test1.so: file format elf32-i386 + +Contents of section .data: + 4db72e4 6873db04 hs.. +Contents of section .got: + 4db7358 e8120000 00000000 00000000 6873db04 ............hs.. +$ readelf -r ./test | sed '/\.gnu\.conflict/,$!d' +Relocation section '.gnu.conflict' at offset 0x7ac contains 18 entries: + Offset Info Type Sym.Value Sym. Name + Addend +04db72e4 00000001 R_386_32 04dbb37c +04db7364 00000001 R_386_32 04dbb37c +00c56874 00000001 R_386_32 fffffff0 +00c56878 00000001 R_386_32 00000001 +00c568bc 00000001 R_386_32 fffffff4 +00c56900 00000001 R_386_32 ffffffec +00c56948 00000001 R_386_32 ffffffdc +00c5695c 00000001 R_386_32 ffffffe0 +00c56980 00000001 R_386_32 fffffff8 +00c56988 00000001 R_386_32 ffffffe4 +00c569a4 00000001 R_386_32 ffffffd8 +00c569c4 00000001 R_386_32 ffffffe8 +00c569d8 00000001 R_386_32 080485b8 +00b1f510 00000007 R_386_JUMP_SLOT 00b91460 +00b1f514 00000007 R_386_JUMP_SLOT 00b91080 +00b1f518 00000007 R_386_JUMP_SLOT 00b91750 +00b1f51c 00000007 R_386_JUMP_SLOT 00b912c0 +00b1f520 00000007 R_386_JUMP_SLOT 00b91200 +$ ./test +0x4dbb37c 0x4dbb37c 0x4dbb37c 0x4dbb37c +\end{verbatim}} +\prelinklistingcaption{Conflict example}} + +In the example, among some conflicts caused by the dynamic linker and the C library, +\footnote{Particularly in the example, the 5 \tts{R\_386\_JUMP\_SLOT} fixups +are \tts{PLT} slots in the dynamic linker for memory allocator functions +resolving to C library functions instead of dynamic linker's own trivial +implementation. First 10 \tts{R\_386\_32} fixups at offsets 0xc56874 +to 0xc569c4 are Thread Local Storage fixups in the C library and +the fixup at 0xc569d8 is for {\sl \_IO\_stdin\_used} weak undefined symbol +in the C library, resolving to a symbol with the same name in the executable.} +there is a conflict for the symbol {\sl i} in \tts{test1.so} shared library. +\tts{test1.so} has just itself in its natural symbol lookup scope (as proved +by + +\tts{LD\_WARN= LD\_TRACE\_PRELINKING=1 LD\_BIND\_NOW=1 /lib/ld-linux.so.2 ./test1.so} + +command output), so when looking up symbol {\sl i} in this +scope the definition in \tts{test1.so} is chosen. \tts{test1.so} has two +relocations against the symbol {\sl i}, one \tts{R\_386\_32} against \tts{.data} +section and one \tts{R\_386\_GLOB\_DAT} against \tts{.got} section. When +prelinking \tts{test1.so} library, the dynamic linker stores the address of +{\sl i} (0x4db7368) into both locations (at offsets 0x4db72e4 and 0x4db7364). +The global symbol search scope in \tts{test} executable contains the executable +itself, \tts{test2.so} and \tts{test1.so} libraries, \tts{libc.so.6} and +the dynamic linker in the listed order. +When doing symbol lookup for symbol {\sl i} +in \tts{test1.so} when doing relocation processing of the whole executable, +address of {\sl i} in \tts{test2.so} is returned as that symbol comes earlier +in the global search scope. So, when none of the libraries nor the executable +is prelinked, the program prints 4 identical addresses. If prelink didn't +create conflict fixups for the two relocations against the symbol {\sl i} +in \tts{test1.so}, prelinked executable (which bypasses normal relocation +processing on startup) would print instead of the desired + +\tts{0x4dbb37c 0x4dbb37c 0x4dbb37c 0x4dbb37c} + +different addresses, + +\tts{0x4db7368 0x4dbb37c 0x4db7368 0x4dbb37c} + +That is a functionality change that \tts{prelink} cannot be permitted to +make, so instead it fixes up the two locations by storing the desired +value in there. In this case \tts{prelink} really cannot avoid that +- \tts{test1.so} shared library could be also used without \tts{test2.so} +in some other executable's symbol search scope. +Or there could be some executable linked with: + +\noindent{{\small\begin{verbatim} +$ gcc -o test2 test.c ./test1.so ./test2.so +\end{verbatim}} +\prelinklistingcaption{Conflict example with swapped order of libraries}} + +where {\sl i} lookup in \tts{test1.so} and \tts{test2.so} is supposed +to resolve to {\sl i} in \tts{test1.so}. + +Now consider what happens if the executable is linked with \tts{-DPRINT\_I}: + +\noindent{{\small\begin{verbatim} +$ gcc -DPRINT_I -o test3 test.c ./test2.so ./test1.so +$ ./test3 +0x804972c +0x804972c 0x804972c 0x804972c 0x804972c +$ prelink -N ./test3 ./test1.so ./test2.so +$ readelf -S ./test2.so | grep '\.data\|\.got' + [ 6] .data PROGBITS 04dbb2f0 0002f0 000004 00 WA 0 0 4 + [ 8] .got PROGBITS 04dbb36c 00036c 000010 04 WA 0 0 4 +$ readelf -r ./test2.so + +Relocation section '.rel.dyn' at offset 0x2c8 contains 2 entries: + Offset Info Type Sym.Value Sym. Name +04dbb2f0 00000d06 R_386_GLOB_DAT 04dbb37c i +04dbb378 00000d06 R_386_GLOB_DAT 04dbb37c i +$ objdump -s -j .got -j .data test2.so + +test2.so: file format elf32-i386 + +Contents of section .data: + 4dbb2f0 7cb3db04 |... +Contents of section .got: + 4dbb36c f4120000 00000000 00000000 7cb3db04 ............|... +$ readelf -r ./test3 + +Relocation section '.rel.dyn' at offset 0x370 contains 4 entries: + Offset Info Type Sym.Value Sym. Name +08049720 00000e06 R_386_GLOB_DAT 00000000 __gmon_start__ +08049724 00000105 R_386_COPY 08049724 j +08049728 00000305 R_386_COPY 08049728 k +0804972c 00000405 R_386_COPY 0804972c i + +Relocation section '.rel.plt' at offset 0x390 contains 4 entries: + Offset Info Type Sym.Value Sym. Name +08049710 00000607 R_386_JUMP_SLOT 080483d8 __libc_start_main +08049714 00000707 R_386_JUMP_SLOT 080483e8 printf +08049718 00000807 R_386_JUMP_SLOT 080483f8 foo +0804971c 00000c07 R_386_JUMP_SLOT 08048408 bar + +Relocation section '.gnu.conflict' at offset 0x7f0 contains 20 entries: + Offset Info Type Sym.Value Sym. Name + Addend +04dbb2f0 00000001 R_386_32 0804972c +04dbb378 00000001 R_386_32 0804972c +04db72e4 00000001 R_386_32 0804972c +04db7364 00000001 R_386_32 0804972c +00c56874 00000001 R_386_32 fffffff0 +00c56878 00000001 R_386_32 00000001 +00c568bc 00000001 R_386_32 fffffff4 +00c56900 00000001 R_386_32 ffffffec +00c56948 00000001 R_386_32 ffffffdc +00c5695c 00000001 R_386_32 ffffffe0 +00c56980 00000001 R_386_32 fffffff8 +00c56988 00000001 R_386_32 ffffffe4 +00c569a4 00000001 R_386_32 ffffffd8 +00c569c4 00000001 R_386_32 ffffffe8 +00c569d8 00000001 R_386_32 080485f0 +00b1f510 00000007 R_386_JUMP_SLOT 00b91460 +00b1f514 00000007 R_386_JUMP_SLOT 00b91080 +00b1f518 00000007 R_386_JUMP_SLOT 00b91750 +00b1f51c 00000007 R_386_JUMP_SLOT 00b912c0 +00b1f520 00000007 R_386_JUMP_SLOT 00b91200 +$ ./test3 +0x804972c +0x804972c 0x804972c 0x804972c 0x804972c +\end{verbatim}} +\prelinklistingcaption{Conflict example with COPY relocation for conflicting symbol}} + +Because the executable is not compiled as position independent code and +\tts{main} function takes address of {\sl i} variable, the object +file for \tts{test3.c} contains a \tts{R\_386\_32} relocation against +{\sl i}. The linker cannot make dynamic relocations against read-only +segment in the executable, so the address of {\sl i} must be constant. +This is accomplished by creating a new object {\sl i} in the executable's +\tts{.dynbss} section and creating a dynamic \tts{R\_386\_COPY} relocation +for it. The relocation ensures that during startup the content of +{\sl i} object earliest in the search scope without the executable +is copied to this {\sl i} object in executable. Now, unlike \tts{test} +executable, in \tts{test3} executable {\sl i} lookups in both \tts{test1.so} +and \tts{test2.so} libraries result in address of {\sl i} in the executable +(instead of \tts{test2.so}). This means that two conflict fixups +are needed again for \tts{test1.so} (but storing 0x804972c instead of +0x4dbb37c) and two new fixups are needed for \tts{test2.so}. + +If the executable is compiled as position independent code, + +\noindent{{\small\begin{verbatim} +$ gcc -fpic -DPRINT_I -o test4 test.c ./test2.so ./test1.so +$ ./test4 +0x4dbb37c +0x4dbb37c 0x4dbb37c 0x4dbb37c 0x4dbb37c +\end{verbatim}} +\prelinklistingcaption{Conflict example with position independent code in the executable}} + +the address of {\sl i} is stored in executable's \tts{.got} section, +which is writable and thus can have dynamic relocation against it. +So the linker creates a \tts{R\_386\_GLOB\_DAT} relocation against +the \tts{.got} section, the symbol {\sl i} is undefined in the executable +and no copy relocations are needed. In this case, only \tts{test1.so} +will need 2 fixups, \tts{test2.so} will not need any. + +There are various reasons for conflicts: +\begin{itemize} +\item Improperly linked shared libraries. If a shared library always needs +symbols from some particular shared library, it should be linked against +that library, usually by adding \tts{-lLIBNAME} to \tts{gcc -shared} command +line used during linking of the shared library. This both reduces conflict +fixups in \tts{prelink} and makes the library easier to load using +\tts{dlopen}, because applications don't have to remember that they have +to load some other library first. The best place to record the dependency +is in the shared library itself. Another reason is if the needed library +uses symbol versioning for its symbols. Not linking against that library +can result in malfunctioning shared library. \tts{Prelink} issues a warning for +such libraries - \tts{Warning: {\sl library} has undefined non-weak symbols}. +When linking a shared library, the \tts{-Wl,-z,defs} option can be used to +ensure there are no such undefined non-weak symbols. There are exceptions, +when undefined non-weak symbols in shared libraries are desirable. +One exception is when there are multiple shared libraries providing the +same functionality, and a shared library doesn't care which one is used. +An example can be e.g. \tts{libreadline.so.4}, which needs some terminal +handling functions, which are provided be either \tts{libtermcap.so.2}, +or \tts{libncurses.so.5}. Another exception is with plugins or other +shared libraries which expect some symbols to be resolved to symbols +defined in the executable. +\item A library overriding functionality of some other library. One example +is e.g. C library and POSIX thread library. Older versions of the GNU C library +did not provide cancelable entry points required by the standard. This is +not needed for non-threaded applications. So only the \tts{libpthread.so.0} shared +library which provides POSIX threading support then overrode the +cancellation entry points required by the standard by wrapper functions +which provided the required functionality. Although most recent versions +of the GNU C library handle cancellation even in entry points in \tts{libc.so.6} +(this was needed for cases when \tts{libc.so.6} comes earlier before +\tts{libpthread.so.0} in symbol search scope and used to be worked around +by non-standard handling of weak symbols in the dynamic linker), because +of symbol versioning the symbols had to stay in \tts{libpthread.so.0} +as well as in \tts{libc.so.6}. This means every program using POSIX +threads on Linux will have a couple of conflict fixups because of this. +\item Programs which need copy relocations. Although \tts{prelink} will +resolve the copy relocations at prelinking time, if any shared library +has relocations against the symbol which needed copy relocation, all such +relocations will need conflict fixups. Generally, it is better to not +export variables from shared libraries in their APIs, instead provide +accessor functions. +\item Function pointer equality requirement for functions called from +executables. When address of some global function is taken, at least +C and C++ require that this pointer is the same in the whole program. +Executables typically contain position dependent code, so when code in the +executable takes address of some function not defined in the executable itself, +that address must be link time constant. Linker accomplishes this by +creating a \tts{PLT} slot for the function unless there was one already +and resolving to the address of \tts{PLT} slot. The symbol for the function +is created with \tts{st\_value} equal to address of the \tts{PLT} slot, +but \tts{st\_shndx} set to \tts{SHN\_UNDEF}. Such symbols are treated +specially by the dynamic linker, in that \tts{PLT} relocations +resolve to first symbol in the global search scope after the executable, +while symbol lookups for all other relocation types return the +address of the symbol in the executable. Unfortunately, GNU linker doesn't +differentiate between taking address of a function in an executable (especially +one for which no dynamic relocation is possible in case it is in read-only +segment) and just calling the function, but never taking its address. +If it cleared the \tts{st\_value} field of the \tts{SHN\_UNDEF} function symbols +in case nothing in the executable takes the function's address, several \tts{prelink} +conflict could disappear (\tts{SHN\_UNDEF} symbols with \tts{st\_value} set +to 0 are treated always as real undefined symbols by the dynamic linker). +\item \tts{COMDAT} code and data in C++. C++ language has several places where +it may need to emit some code or data without a clear unique +compilation unit owning it. Examples include taking address of an +\tts{inline} function, local static variable in \tts{inline} functions, +virtual tables for some classes (this depends on \tts{\#pragma interface} +or \tts{\#pragma implementation} presence, presence of non-inline +non-pure-virtual member function in the class, etc.), {\sl RTTI} info for them. +Compilers and linkers handle these using various \tts{COMDAT} schemes, +e.g. GNU linker's \tts{.gnu.linkonce*} special sections or using +\tts{SHT\_GROUP}. Unfortunately, all these duplicate merging schemes +work only during linking of shared libraries or executables, no duplicate +removal is done across shared libraries. Shared libraries typically +have relocations against their \tts{COMDAT} code or data objects (otherwise +they wouldn't be at least in most cases emitted at all), so if there are +\tts{COMDAT} duplicates across shared libraries or the executable, they +lead to conflict fixups. The linker theoretically could try to +merge \tts{COMDAT} duplicates across shared libraries if specifically +requested by the user (if a \tts{COMDAT} symbol is already present in +one of the dependent shared libraries and is \tts{STB\_WEAK}, the linker +could skip it). Unfortunately, this only works as long as the user has +full control over the dependent shared libraries, because the \tts{COMDAT} +symbol could be exported from them just as a side effect of their +implementation (e.g. they use some class internally). When such libraries +are rebuilt even with minor changes in their implementation (unfortunately +with C++ shared libraries it is usually not very clear what part is exported +ABI and what is not), some of those \tts{COMDAT} symbols in them could go +away (e.g. because suddenly they use a different class internally and +the previously used class is not referenced anywhere). When \tts{COMDAT} +objects are not merged across shared libraries, this makes no problems, +as each library which needs the \tts{COMDAT} has its own copy. But with +\tts{COMDAT} duplicate removal between shared libraries there could suddenly +be unresolved references and the shared libraries would need to be relinked. +The only place where this could work safely is when a single package +includes several C++ shared libraries which depend on each other. They are +then shipped always together and when one changes, all others need changing +too. +\end{itemize} + +\section{Prelink optimizations to reduce number of conflict fixups} + +\tts{Prelink} can optimize out some conflict fixups if it can prove that +the changes are not observable by the application at runtime (opening its +executable and reading it doesn't count). If there is a data object in some +shared library with a symbol that is overridden by a symbol in a different +shared library earlier in global symbol lookup scope or in the executable, then +that data object is likely never referenced and it shouldn't matter what it +contains. Examine the following example: + +\noindent{{\small\begin{verbatim} +$ cat > test1.c <<EOF +int i, j, k; +struct A { int *a; int *b; int *c; } x = { &i, &j, &k }; +struct A *y = &x; +EOF +$ cat > test2.c <<EOF +int i, j, k; +struct A { int *a; int *b; int *c; } x = { &i, &j, &k }; +struct A *z = &x; +EOF +$ cat > test.c <<EOF +#include <stdio.h> +extern struct A { int *a; int *b; int *c; } *y, *z; +int main (void) +{ + printf ("%p: %p %p %p\n", y, y->a, y->b, y->c); + printf ("%p: %p %p %p\n", z, z->a, z->b, z->c); +} +EOF +$ gcc -nostdlib -shared -fpic -s -o test1.so test1.c +$ gcc -nostdlib -shared -fpic -o test2.so test2.c ./test1.so +$ gcc -o test test.c ./test2.so ./test1.so +$ ./test +0xaf3314: 0xaf33b0 0xaf33a8 0xaf33ac +0xaf3314: 0xaf33b0 0xaf33a8 0xaf33ac +\end{verbatim}} +\prelinklistingcaption{C example where conflict fixups could be optimized out}} + +In this example there are 3 conflict fixups pointing into the 12 byte +long {\sl x} object in \tts{test1.so} shared library (among other +conflicts). And nothing in the program can poke at {\sl x} content +in \tts{test1.so}, simply because it has to look at it through +{\sl x} symbol which resolves to \tts{test2.so}. So in this +case \tts{prelink} could skip those 3 conflicts. Unfortunately +it is not that easy: + +\noindent{{\small\begin{verbatim} +$ cat > test3.c <<EOF +int i, j, k; +static struct A { int *a; int *b; int *c; } local = { &i, &j, &k }; +extern struct A x; +struct A *y = &x; +struct A *y2 = &local; +extern struct A x __attribute__((alias ("local"))); +EOF +$ cat > test4.c <<EOF +#include <stdio.h> +extern struct A { int *a; int *b; int *c; } *y, *y2, *z; +int main (void) +{ + printf ("%p: %p %p %p\n", y, y->a, y->b, y->c); + printf ("%p: %p %p %p\n", y2, y2->a, y2->b, y2->c); + printf ("%p: %p %p %p\n", z, z->a, z->b, z->c); +} +EOF +$ gcc -nostdlib -shared -fpic -s -o test3.so test3.c +$ gcc -nostdlib -shared -fpic -o test4.so test2.c ./test3.so +$ gcc -o test4 test4.c ./test4.so ./test3.so +$ ./test4 +0x65a314: 0x65a3b0 0x65a3a8 0x65a3ac +0xbd1328: 0x65a3b0 0x65a3a8 0x65a3ac +0x65a314: 0x65a3b0 0x65a3a8 0x65a3ac +\end{verbatim}} +\prelinklistingcaption{Modified C example where conflict fixups cannot be removed}} + +In this example, there are again 3 conflict fixups pointing into the +12 byte long {\sl x} object in \tts{test3.so} shared library. +The fact that variable local is located at the same 12 bytes +is totally invisible to prelink, as local is a \tts{STB\_LOCAL} +symbol which doesn't show up in \tts{.dynsym} section. But if those +3 conflict fixups are removed, then suddenly program's observable +behavior changes (the last 3 addresses on second line would be +different than those on first or third line). + +Fortunately, there are at least some objects where \tts{prelink} +can be reasonably sure they will never be referenced through some +local alias. Those are various compiler generated objects with +well defined meaning which is \tts{prelink} able to identify +in shared libraries. The most important ones are C++ virtual tables +and {\sl RTTI} data. They are emitted as COMDAT data by the compiler, +in GCC into \tts{.gnu.linkonce.d.*} sections. Data or code in these +sections can be accessed only through global symbols, otherwise linker +might create unexpected results when two or more of these sections +are merged together (all but one deleted). When \tts{prelink} is checking +for such data, it first checks whether the shared library in question +is linked against \tts{libstdc++.so}. If not, it is not a C++ library +(or incorrectly built one) and thus it makes no sense to search any further. +It looks only in \tts{.data} section, for \tts{STB\_WEAK} \tts{STT\_OBJECT} +symbols whose names start with certain prefixes +\footnote{\tts{\_\_vt\_} for GCC 2.95.x and 2.96-RH virtual tables, +\tts{\_ZTV} for GCC 3.x virtual tables and \tts{\_ZTI} for GCC 3.x {\sl RTTI} data.} +and where no other symbols (in dynamic symbol table) point into the objects. +If these objects are unused because there is a conflict on their symbol, +all conflict fixups pointing into the virtual table or {\sl RTTI} structure +can be discarded. + +Another possible optimization is again related to C++ virtual tables. +Function addresses in them are not intended for pointer comparisons. +C++ code only loads them from the virtual tables and calls through +the pointer. Pointers to member functions are handled differently. +As pointer equivalence is the only reason why all function pointers +resolve to \tts{PLT} slots in the executable even when the executable doesn't +include implementation of the function (i.e. has \tts{SHN\_UNDEF} symbol +with non-zero \tts{st\_value} pointing at the \tts{PLT} slot in the +executable), \tts{prelink} can resolve method addresses in virtual tables +to the actual method implementation. In many cases this is in the same +library as the virtual table (or in one of libraries in its natural +symbol lookup scope), so a conflict fixup is unnecessary. +This optimization speeds up programs also after control is transfered +to the application and not just the time to start up the application, +although just a few cycles per method call. + +The conflict fixup reduction is quite big on some programs. +Below is statistics for \tts{kmail} program on completely unprelinked box: + +\noindent{{\small\begin{verbatim} +$ LD_DEBUG=statistics /usr/bin/kmail 2>&1 | sed '2,8!d;s/^ *//' +10621: total startup time in dynamic loader: 240724867 clock cycles +10621: time needed for relocation: 234049636 clock cycles (97.2%) +10621: number of relocations: 34854 +10621: number of relocations from cache: 74364 +10621: number of relative relocations: 35351 +10621: time needed to load objects: 6241678 clock cycles (2.5%) +$ ls -l /usr/bin/kmail +-rwxr-xr-x 1 root root 2149084 Oct 2 12:05 /usr/bin/kmail +$ ( Xvfb :3 & ) >/dev/null 2>&1 </dev/null; sleep 20 +$ ( DISPLAY=:3 kmail& ) >/dev/null 2>&1 </dev/null; sleep 10; killall kmail +$ ( DISPLAY=:3 kmail& ) >/dev/null 2>&1 </dev/null; sleep 10 +$ cat /proc/`/sbin/pidof kmail`/statm +4164 4164 3509 224 33 3907 655 +$ killall Xvfb kdeinit kmail +\end{verbatim}} +\prelinklistingcaption{Statistics for unprelinked \tts{kmail}}} + +\tts{statm} special file for a process contains its memory statistics. +The numbers in it mean in order total number of used pages (on IA-32 +Linux a page is 4KB), number of resident pages (i.e. not swapped out), +number of shared pages, number of text pages, number of library pages, +number of stack and other pages and number of dirty pages used by the +process. Distinction between text and library pages is very rough, +so those numbers aren't that much useful. Of interest are mainly +first number, third number and last number. + +Statistics for \tts{kmail} on completely prelinked box: + +\noindent{{\small\begin{verbatim} +$ LD_DEBUG=statistics /usr/bin/kmail 2>&1 | sed '2,8!d;s/^ *//' +14864: total startup time in dynamic loader: 8409504 clock cycles +14864: time needed for relocation: 3024720 clock cycles (35.9%) +14864: number of relocations: 0 +14864: number of relocations from cache: 8961 +14864: number of relative relocations: 0 +14864: time needed to load objects: 4897336 clock cycles (58.2%) +$ ls -l /usr/bin/kmail +-rwxr-xr-x 1 root root 2269500 Oct 2 12:05 /usr/bin/kmail +$ ( Xvfb :3 & ) >/dev/null 2>&1 </dev/null; sleep 20 +$ ( DISPLAY=:3 kmail& ) >/dev/null 2>&1 </dev/null; sleep 10; killall kmail +$ ( DISPLAY=:3 kmail& ) >/dev/null 2>&1 </dev/null; sleep 10 +$ cat /proc/`/sbin/pidof kmail`/statm +3803 3803 3186 249 33 3521 617 +$ killall Xvfb kdeinit kmail +\end{verbatim}} +\prelinklistingcaption{Statistics for prelinked \tts{kmail}}} + +Statistics for \tts{kmail} on completely prelinked box with C++ conflict fixup +optimizations turned off: + +\noindent{{\small\begin{verbatim} +$ LD_DEBUG=statistics /usr/bin/kmail 2>&1 | sed '2,8!d;s/^ *//' +20645: total startup time in dynamic loader: 9704168 clock cycles +20645: time needed for relocation: 4734715 clock cycles (48.7%) +20645: number of relocations: 0 +20645: number of relocations from cache: 59871 +20645: number of relative relocations: 0 +20645: time needed to load objects: 4487971 clock cycles (46.2%) +ls -l /usr/bin/kmail +-rwxr-xr-x 1 root root 2877360 Oct 2 12:05 /usr/bin/kmail +$ ( Xvfb :3 & ) >/dev/null 2>&1 </dev/null; sleep 20 +$ ( DISPLAY=:3 kmail& ) >/dev/null 2>&1 </dev/null; sleep 10; killall kmail +$ ( DISPLAY=:3 kmail& ) >/dev/null 2>&1 </dev/null; sleep 10 +$ cat /proc/`/sbin/pidof kmail`/statm +3957 3957 3329 398 33 3526 628 +$ killall Xvfb kdeinit kmail +\end{verbatim}} +\prelinklistingcaption{Statistics for prelinked \tts{kmail} without conflict fixup reduction}} + +On this application, C++ conflict fixup optimizations saved 50910 unneeded +conflict fixups, speeded up startup by 13.3\% and decreased number of dirty +pages by 11, which means the application needs 44KB less memory per-process. + +\section{Thread Local Storage support} + +Thread Local Storage ([12], [13], [14]) support has been recently added to +GCC, GNU binutils and GNU C Library. \tts{TLS} support is a set of new +relocations which together with dynamic linker and POSIX thread library +additions provide faster and easier to use alternative to traditional +POSIX thread local data API (\tts{pthread\_getspecific}, +\tts{pthread\_setspecific}, \tts{pthread\_key\_*}). + +\tts{TLS} necessitated several changes to \tts{prelink}. Thread Local +symbols (with type \tts{STT\_TLS}) must not be relocated, as they are +relative to the start of \tts{PT\_TLS} segment and thus not virtual +addresses. The dynamic linker had to be enhanced so that it tells +\tts{prelink} at \tts{LD\_TRACE\_PRELINKING} time what \tts{TLS} module +IDs have been assigned and what addresses relative to start of \tts{TLS} +block have been given to \tts{PT\_TLS} segment of each library or executable. +There are 3 classes of new \tts{TLS} dynamic relocations \tts{prelink} +is interested in (with different names on different architectures). + +In first class are module ID relocations, which are used for \tts{TLS} +Global Dynamic and Local Dynamic models (for Global Dynamic model +they are supposed to resolve to module ID of the executable or shared library +of particular \tts{STT\_TLS} symbol, for Local Dynamic model this +resolves to module ID of the containing shared library). These +relocations are hard to prelink in any useful way without moving +\tts{TLS} module ID assignment from the dynamic linker to \tts{prelink}. +Although \tts{prelink} can find out what shared library will contain +particular \tts{STT\_TLS} symbol unless there will be conflicts +for that symbol, it doesn't know how many shared libraries with +\tts{PT\_TLS} segment will precede it or whether executable will or +will not have \tts{PT\_TLS} segment. Until \tts{TLS} is widely +deployed by many libraries, \tts{prelink} could guess that +only \tts{libc.so} will have \tts{PT\_TLS} and store 1 (first module ID +the dynamic linker assigns), but given that \tts{libc.so} uses just +one such relocation it is not probably worth doing this when soon other +shared libraries besides \tts{libc.so} and \tts{libGL.so} start using +it heavily. Because of this \tts{prelink} doesn't do anything special +when prelinking shared libraries with these relocations and for each +relocations in this class creates one conflict fixup. + +In second class are relocations which resolve to \tts{st\_value} +of some \tts{STT\_TLS} symbol. These relocations are used in +Global Dynamic \tts{TLS} model (in Local Dynamic they are resolved +at link time already) and from \tts{prelink} point of view they are +much more similar to normal relocations than the other two classes. +When the \tts{STT\_TLS} symbol is looked up successfully in shared library's +natural search scope, \tts{prelink} just stores its \tts{st\_value} +into the relocation. The chances there will be a conflict are even +smaller than with normal symbol lookups, since overloading \tts{TLS} +symbols means wasted memory in each single thread and thus library +writers will try to avoid it if possible. + +The third class includes relocations which resolve to offsets within +program's initial \tts{TLS} block +\footnote{Negative on architectures which have +\tts{TLS} block immediately below thread pointer (e.g. IA-32, AMD64, +SPARC, S/390) and positive on architectures which have \tts{TLS} block +at thread pointer or a few bytes above it (e.g. PowerPC, Alpha, IA-64, +SuperH).} +Relocation in this class are used in Initial Exec \tts{TLS} model +(or in Local Exec model if this model is supported in shared libraries). +These offsets are even harder to predict than module IDs and unlike +module IDs it wouldn't be very helpful if they were assigned by +\tts{prelink} instead of dynamic linker (which would just read them +from some dynamic tag). That's because \tts{TLS} block needs to be +packed tightly and any assignments in \tts{prelink} couldn't take +into account other shared libraries linked into the same executable +and the executable itself. Similarly to module ID relocations, +\tts{prelink} doesn't do anything about them when prelinking shared +libraries and for each such relocation creates a conflict fixup. + +\section{Prelinking of executables and shared libraries} + +Rewriting of executables is harder than for shared libraries, both because +there are more changes necessary and because shared libraries are +relocatable and thus have dynamic relocations for all absolute addresses. + +After collecting all information from the dynamic linker and assigning +virtual address space slots to all shared libraries, prelinking of shared +libraries involves following steps: +\begin{itemize} +\item Relocation of the shared library to the assigned base address. +\item \tts{REL} to \tts{RELA} conversion if needed (the only step which +changes sizes of allocated sections in the middle). +\item On architectures which have \tts{SHT\_NOBITS} \tts{.plt} sections, +before relocations are applied the section needs to be converted to +\tts{SHT\_PROGBITS}. As the section needs to be at the end (or after it) +of file backed part of some \tts{PT\_LOAD} segment, this just means that +the file backed up part needs to be enlarged, the file filled with zeros +and all following section file offsets or program header entry file +offsets adjusted. All \tts{SHT\_NOBITS} sections in the same \tts{PT\_LOAD} +segment with virtual addresses lower than the \tts{.plt} start address +need to be converted from \tts{SHT\_NOBITS} to \tts{SHT\_PROGBITS} too. +Without making the section \tts{SHT\_PROGBITS}, \tts{prelink} cannot +apply relocations against it as such sections contain only zeros. +Architectures with \tts{SHT\_NOBITS} \tts{.plt} section supported by +\tts{prelink} are PowerPC and PowerPC64. +\item Applying relocations. For each dynamic relocation in the shared +library, address of relocation's symbol looked up in natural symbol lookup +search scope of the shared library (or 0 if the symbol is not found in +that search scope) is stored in an architecture and relocation type +dependent way to memory pointed by \tts{r\_offset} field of the relocation. +This step uses symbol lookup information provided by dynamic linker. +\item Addition or modification of \tts{DT\_CHECKSUM} and +\tts{DT\_GNU\_PRELINKED} dynamic tags. +\footnote{\tts{Prelink} is not able to grow \tts{.dynamic} section, so it +needs some spare dynamic tags (DT\_NULL) at the end of \tts{.dynamic} +section. GNU linker versions released after August 2001 leave space by +default.} The former is set to checksum of allocated sections in the +shared library, the latter to time of prelinking. +\item On architectures which don't use writable \tts{.plt}, but instead use +\tts{.got.plt} (this section is merged during linking into \tts{.got}) +section, \tts{prelink} typically stores address into the first PLT slot +in \tts{.plt} section to the reserved second word of \tts{.got} section. +On these architectures, the dynamic linker has to initialize \tts{.plt} +section if lazy binding. On non-prelinked executables or shared libraries +this typically means adding load offset to the values in \tts{.got.plt} +section, for prelinked shared libraries or executables if prelinking +information cannot be used it needs to compute the right values in +\tts{.got.plt} section without looking at this section's content +(since it contains prelinking information). The second word in \tts{.got} +section is used for this computation. +\item Addition of \tts{.gnu\_prelink\_undo} unallocated section if not +present yet. This section is used by \tts{prelink} internally during +undo operation. +\item Addition of \tts{.gnu\_liblist} and \tts{.gnu\_libstr} unallocated +sections or, if they are already present, their update including possible +growing or shrinking. These sections are used only by \tts{prelink} to +compare the dependent libraries (and their order) at the time when the +shared library was prelinked against current dependencies. If a shared +library has no dependencies (e.g. dynamic linker), these sections are not +present. +\end{itemize} + +Adding or resizing unallocated section needs just file offsets of following +unallocated sections recomputed (ensuring proper alignment), growing section +header table and \tts{.shstrtab} and adding new section names to that section. + +Prelinking of executables involves following steps: +\begin{itemize} +\item \tts{REL} to \tts{RELA} conversion if needed. +\item \tts{SHT\_NOBITS} to \tts{SHT\_PROGBITS} conversion of \tts{.plt} section +if needed. +\item Applying relocations. +\item Addition or resizing of allocated \tts{.gnu.conflict} section containing +list of conflict fixups. +\item Addition or resizing of allocated \tts{.gnu.liblist} section which is used +by the dynamic linker at runtime to see if none of the dependencies changed +or were reordered. If they were, it continues normal relocation processing, +otherwise they can be skipped and only conflict fixups applied. +\item Growing of allocated \tts{.dynstr} section, where strings referenced from +\tts{.gnu.liblist} section need to be added. +\item If there are any COPY relocations (which \tts{prelink} wants to handle +rather than deferring them as conflict fixups to runtime), they need to be applied. +\item Modifying second word in \tts{.got} section for \tts{.got.plt} using +architectures. +\item Addition or adjusting of dynamic tags which allow the dynamic linker +to find the \tts{.gnu.liblist} and \tts{.gnu.conflict} sections and their +sizes. \tts{DT\_GNU\_CONFLICT} and \tts{DT\_GNU\_CONFLICTSZ} should be present +if there are any conflict fixups. It should contain the virtual address of +the \tts{.gnu.conflict} section start resp. its size in bytes. +\tts{DT\_GNU\_LIBLIST} and \tts{DT\_GNU\_LIBLISTSZ} need to be present in +all prelinked executables and must be equal the to virtual address of +the \tts{.gnu.liblist} section and its size in bytes. +\item Addition of \tts{.gnu\_prelink\_undo} unallocated section if not present. +\end{itemize} + +Executables can have absolute relocations already applied (and without a +dynamic relocation) to virtually any allocated \tts{SHT\_PROGBITS} section +\footnote{One exception is \tts{.interp} special section. It shouldn't have +relocations applied to it, nor any other section should reference it.}, +against almost all allocated \tts{SHT\_PROGBITS} and \tts{SHT\_NOBITS} +sections. This means that when growing, adding or shrinking allocated +sections in executables, all \tts{SHT\_PROGBITS} and \tts{SHT\_NOBITS} section +must keep their original virtual addresses and sizes +\footnote{With a notable exception of splitting one section into two +covering the same virtual address range.}. \tts{Prelink} tries various +places where to put allocated sections which were added or grew: +\begin{itemize} +\item In the unlikely case if there is already some gap between +sections in read-only \tts{PT\_LOAD} segment where the section fits. +\item If the \tts{SHT\_NOBITS} sections are small enough to fit +into a page together with the preceding \tts{SHT\_PROGBITS} section and there +is still some space in the page after the \tts{SHT\_NOBITS} sections. +In this case, \tts{prelink} converts the \tts{SHT\_NOBITS} sections into +\tts{SHT\_PROGBITS} sections, fills them with zeros and adds the new section +after it. This doesn't increase number of \tts{PT\_LOAD} segments, but +unfortunately those added sections are writable. This doesn't matter +much for e.g. \tts{.gnu.conflict} section which is only used before control +is transfered to the program, but could matter for \tts{.dynstr} which is +used even during \tts{dlopen}. +\item On IA-32, executables have for historical reasons base address 0x8048000. +The reason for this was that when stack was put immediately below executables, +stack and the executable could coexist in the same second level page table. +Linux puts the stack typically at the end of virtual address space and so +keeping this exact base address is not really necessary. \tts{Prelink} can +decrease the base address and thus increase size of read-only \tts{PT\_LOAD} +segment while \tts{SHT\_PROGBITS} and \tts{SHT\_NOBITS} section can stay +at their previous addresses. Just their file offsets need to be increased. +All these segment header adjustments need to be done in multiplies of +\tts{ELF} page sizes, so even if \tts{prelink} chose to do similar things +on architectures other than IA-32 which typically start executables on some address +which is a power of 2, it would be only reasonable if \tts{ELF} page size +on that architecture (which can be much bigger than page size used by the +operating system) is very small. +\item Last possibility is to create a new \tts{PT\_LOAD} segment. +\footnote{Linux kernels before 2.4.10 loaded executables which had middle \tts{PT\_LOAD} +segment with \tts{p\_memsz} bigger than \tts{p\_filesz} incorrectly, so +\tts{prelink} should be only used on systems with 2.4.10 or later kernels.} +Section immediately above program header table (typically \tts{.interp}) +has to be moved somewhere else, but if possible close to the beginning +of the executable. The new \tts{PT\_LOAD} segment is then added after the +last \tts{PT\_LOAD} segment. The segment has to be writable even when +all the sections in it are read-only, unless it ends exactly on a page +boundary, because \tts{brk} area starts immediately after the end of last +\tts{PT\_LOAD} segment and the executable expects it to be writable. +\end{itemize} + +So that verification works properly, if there is \tts{.gnu.prelink\_undo} +section in the executable, \tts{prelink} first reshuffles the sections and +segments for the purpose of finding places for the sections to the original +sequence as recorded in the \tts{.gnu.prelink\_undo} section. +Examples of the above mentioned cases: + +\noindent{{\small\begin{verbatim} +$ SEDCMD='s/^.* \.plt.*$/.../;/\[.*\.text/,/\[.*\.got/d' +$ SEDCMD2='/Section to Segment/,$d;/^Key to/,/^Program/d;/^[A-Z]/d;/^ *$/d' +$ cat > test1.c <<EOF +int main (void) { return 0; } +EOF +$ gcc -Wl,--verbose 2>&1 \ + | sed '/^===/,/^===/!d;/^===/d;s/\.rel\.dyn/. += 512; &/' > test1.lds +$ gcc -s -O2 -o test1 test1.c -Wl,-T,test1.lds +$ readelf -Sl ./test1 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08048114 000114 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08048128 000128 000020 00 A 0 0 4 + [ 3] .hash HASH 08048148 000148 000024 04 A 4 0 4 + [ 4] .dynsym DYNSYM 0804816c 00016c 000040 10 A 5 1 4 + [ 5] .dynstr STRTAB 080481ac 0001ac 000045 00 A 0 0 1 + [ 6] .gnu.version VERSYM 080481f2 0001f2 000008 02 A 4 0 2 + [ 7] .gnu.version_r VERNEED 080481fc 0001fc 000020 00 A 5 1 4 + [ 8] .rel.dyn REL 0804841c 00041c 000008 08 A 4 0 4 + [ 9] .rel.plt REL 08048424 000424 000008 08 A 4 b 4 + [10] .init PROGBITS 0804842c 00042c 000017 00 AX 0 0 4 +... + [22] .bss NOBITS 080496f8 0006f8 000004 00 WA 0 0 4 + [23] .comment PROGBITS 00000000 0006f8 000132 00 0 0 1 + [24] .shstrtab STRTAB 00000000 00082a 0000be 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08048034 0x08048034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000114 0x08048114 0x08048114 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08048000 0x08048000 0x005fc 0x005fc R E 0x1000 + LOAD 0x0005fc 0x080495fc 0x080495fc 0x000fc 0x00100 RW 0x1000 + DYNAMIC 0x000608 0x08049608 0x08049608 0x000c8 0x000c8 RW 0x4 + NOTE 0x000128 0x08048128 0x08048128 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +$ prelink -N ./test1 +$ readelf -Sl ./test1 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08048114 000114 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08048128 000128 000020 00 A 0 0 4 + [ 3] .hash HASH 08048148 000148 000024 04 A 4 0 4 + [ 4] .dynsym DYNSYM 0804816c 00016c 000040 10 A 8 1 4 + [ 5] .gnu.liblist GNU_LIBLIST 080481ac 0001ac 000028 14 A 8 0 4 + [ 6] .gnu.version VERSYM 080481f2 0001f2 000008 02 A 4 0 2 + [ 7] .gnu.version_r VERNEED 080481fc 0001fc 000020 00 A 8 1 4 + [ 8] .dynstr STRTAB 0804821c 00021c 000058 00 A 0 0 1 + [ 9] .gnu.conflict RELA 08048274 000274 0000c0 0c A 4 0 4 + [10] .rel.dyn REL 0804841c 00041c 000008 08 A 4 0 4 + [11] .rel.plt REL 08048424 000424 000008 08 A 4 d 4 + [12] .init PROGBITS 0804842c 00042c 000017 00 AX 0 0 4 +... + [24] .bss NOBITS 080496f8 0006f8 000004 00 WA 0 0 4 + [25] .comment PROGBITS 00000000 0006f8 000132 00 0 0 1 + [26] .gnu.prelink_undo PROGBITS 00000000 00082c 0004d4 01 0 0 4 + [27] .shstrtab STRTAB 00000000 000d00 0000eb 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08048034 0x08048034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000114 0x08048114 0x08048114 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08048000 0x08048000 0x005fc 0x005fc R E 0x1000 + LOAD 0x0005fc 0x080495fc 0x080495fc 0x000fc 0x00100 RW 0x1000 + DYNAMIC 0x000608 0x08049608 0x08049608 0x000c8 0x000c8 RW 0x4 + NOTE 0x000128 0x08048128 0x08048128 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +\end{verbatim}} +\prelinklistingcaption{Reshuffling of an executable with a gap between sections}} + +\begin{figure}[!ht] +\centering +\includegraphics[width=\textwidth]{gap} +\caption{Reshuffling of an executable with a gap between sections} +\end{figure} + +In the above sample, there was enough space between sections (particularly +between the end of the \tts{.gnu.version\_r} section and the start of \tts{.rel.dyn}) +that the new sections could be added there. + +\noindent{{\small\begin{verbatim} +$ SEDCMD='s/^.* \.plt.*$/.../;/\[.*\.text/,/\[.*\.got/d' +$ SEDCMD2='/Section to Segment/,$d;/^Key to/,/^Program/d;/^[A-Z]/d;/^ *$/d' +$ cat > test2.c <<EOF +int main (void) { return 0; } +EOF +$ gcc -s -O2 -o test2 test2.c +$ readelf -Sl ./test2 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08048114 000114 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08048128 000128 000020 00 A 0 0 4 + [ 3] .hash HASH 08048148 000148 000024 04 A 4 0 4 + [ 4] .dynsym DYNSYM 0804816c 00016c 000040 10 A 5 1 4 + [ 5] .dynstr STRTAB 080481ac 0001ac 000045 00 A 0 0 1 + [ 6] .gnu.version VERSYM 080481f2 0001f2 000008 02 A 4 0 2 + [ 7] .gnu.version_r VERNEED 080481fc 0001fc 000020 00 A 5 1 4 + [ 8] .rel.dyn REL 0804821c 00021c 000008 08 A 4 0 4 + [ 9] .rel.plt REL 08048224 000224 000008 08 A 4 b 4 + [10] .init PROGBITS 0804822c 00022c 000017 00 AX 0 0 4 +... + [22] .bss NOBITS 080494f8 0004f8 000004 00 WA 0 0 4 + [23] .comment PROGBITS 00000000 0004f8 000132 00 0 0 1 + [24] .shstrtab STRTAB 00000000 00062a 0000be 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08048034 0x08048034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000114 0x08048114 0x08048114 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08048000 0x08048000 0x003fc 0x003fc R E 0x1000 + LOAD 0x0003fc 0x080493fc 0x080493fc 0x000fc 0x00100 RW 0x1000 + DYNAMIC 0x000408 0x08049408 0x08049408 0x000c8 0x000c8 RW 0x4 + NOTE 0x000128 0x08048128 0x08048128 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +$ prelink -N ./test2 +$ readelf -Sl ./test2 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08048114 000114 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08048128 000128 000020 00 A 0 0 4 + [ 3] .hash HASH 08048148 000148 000024 04 A 4 0 4 + [ 4] .dynsym DYNSYM 0804816c 00016c 000040 10 A 23 1 4 + [ 5] .gnu.liblist GNU_LIBLIST 080481ac 0001ac 000028 14 A 23 0 4 + [ 6] .gnu.version VERSYM 080481f2 0001f2 000008 02 A 4 0 2 + [ 7] .gnu.version_r VERNEED 080481fc 0001fc 000020 00 A 23 1 4 + [ 8] .rel.dyn REL 0804821c 00021c 000008 08 A 4 0 4 + [ 9] .rel.plt REL 08048224 000224 000008 08 A 4 b 4 + [10] .init PROGBITS 0804822c 00022c 000017 00 AX 0 0 4 +... + [22] .bss PROGBITS 080494f8 0004f8 000004 00 WA 0 0 4 + [23] .dynstr STRTAB 080494fc 0004fc 000058 00 A 0 0 1 + [24] .gnu.conflict RELA 08049554 000554 0000c0 0c A 4 0 4 + [25] .comment PROGBITS 00000000 000614 000132 00 0 0 1 + [26] .gnu.prelink_undo PROGBITS 00000000 000748 0004d4 01 0 0 4 + [27] .shstrtab STRTAB 00000000 000c1c 0000eb 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08048034 0x08048034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000114 0x08048114 0x08048114 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08048000 0x08048000 0x003fc 0x003fc R E 0x1000 + LOAD 0x0003fc 0x080493fc 0x080493fc 0x00218 0x00218 RW 0x1000 + DYNAMIC 0x000408 0x08049408 0x08049408 0x000c8 0x000c8 RW 0x4 + NOTE 0x000128 0x08048128 0x08048128 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +\end{verbatim}} +\prelinklistingcaption{Reshuffling of an executable with small \tts{.bss}}} + +\begin{figure}[!ht] +\centering +\includegraphics[width=\textwidth]{bss} +\caption{Reshuffling of an executable with small \tts{.bss}} +\end{figure} + +In this case \tts{.bss} section was small enough that \tts{prelink} +converted it to \tts{SHT\_PROGBITS}. + +\noindent{{\small\begin{verbatim} +$ SEDCMD='s/^.* \.plt.*$/.../;/\[.*\.text/,/\[.*\.got/d' +$ SEDCMD2='/Section to Segment/,$d;/^Key to/,/^Program/d;/^[A-Z]/d;/^ *$/d' +$ cat > test3.c <<EOF +int foo [4096]; +int main (void) { return 0; } +EOF +$ gcc -s -O2 -o test3 test3.c +$ readelf -Sl ./test3 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08048114 000114 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08048128 000128 000020 00 A 0 0 4 + [ 3] .hash HASH 08048148 000148 000024 04 A 4 0 4 + [ 4] .dynsym DYNSYM 0804816c 00016c 000040 10 A 5 1 4 + [ 5] .dynstr STRTAB 080481ac 0001ac 000045 00 A 0 0 1 + [ 6] .gnu.version VERSYM 080481f2 0001f2 000008 02 A 4 0 2 + [ 7] .gnu.version_r VERNEED 080481fc 0001fc 000020 00 A 5 1 4 + [ 8] .rel.dyn REL 0804821c 00021c 000008 08 A 4 0 4 + [ 9] .rel.plt REL 08048224 000224 000008 08 A 4 b 4 + [10] .init PROGBITS 0804822c 00022c 000017 00 AX 0 0 4 +... + [22] .bss NOBITS 08049500 000500 004020 00 WA 0 0 32 + [23] .comment PROGBITS 00000000 000500 000132 00 0 0 1 + [24] .shstrtab STRTAB 00000000 000632 0000be 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08048034 0x08048034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000114 0x08048114 0x08048114 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08048000 0x08048000 0x003fc 0x003fc R E 0x1000 + LOAD 0x0003fc 0x080493fc 0x080493fc 0x000fc 0x04124 RW 0x1000 + DYNAMIC 0x000408 0x08049408 0x08049408 0x000c8 0x000c8 RW 0x4 + NOTE 0x000128 0x08048128 0x08048128 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +$ prelink -N ./test3 +$ readelf -Sl ./test3 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08047114 000114 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08047128 000128 000020 00 A 0 0 4 + [ 3] .dynstr STRTAB 08047148 000148 000058 00 A 0 0 1 + [ 4] .gnu.liblist GNU_LIBLIST 080471a0 0001a0 000028 14 A 3 0 4 + [ 5] .gnu.conflict RELA 080471c8 0001c8 0000c0 0c A 7 0 4 + [ 6] .hash HASH 08048148 001148 000024 04 A 7 0 4 + [ 7] .dynsym DYNSYM 0804816c 00116c 000040 10 A 3 1 4 + [ 8] .gnu.version VERSYM 080481f2 0011f2 000008 02 A 7 0 2 + [ 9] .gnu.version_r VERNEED 080481fc 0011fc 000020 00 A 3 1 4 + [10] .rel.dyn REL 0804821c 00121c 000008 08 A 7 0 4 + [11] .rel.plt REL 08048224 001224 000008 08 A 7 d 4 + [12] .init PROGBITS 0804822c 00122c 000017 00 AX 0 0 4 +... + [24] .bss NOBITS 08049500 0014f8 004020 00 WA 0 0 32 + [25] .comment PROGBITS 00000000 0014f8 000132 00 0 0 1 + [26] .gnu.prelink_undo PROGBITS 00000000 00162c 0004d4 01 0 0 4 + [27] .shstrtab STRTAB 00000000 001b00 0000eb 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08047034 0x08047034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000114 0x08047114 0x08047114 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08047000 0x08047000 0x013fc 0x013fc R E 0x1000 + LOAD 0x0013fc 0x080493fc 0x080493fc 0x000fc 0x04124 RW 0x1000 + DYNAMIC 0x001408 0x08049408 0x08049408 0x000c8 0x000c8 RW 0x4 + NOTE 0x000128 0x08047128 0x08047128 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +\end{verbatim}} +\prelinklistingcaption{Reshuffling of an executable with decreasing of base address}} + +\begin{figure}[!ht] +\centering +\includegraphics[width=\textwidth]{basemove} +\caption{Reshuffling of an executable with decreasing of the base address} +\end{figure} + +In \tts{test3} the base address of the executable was decreased by one page and +the new sections added there. + +\noindent{{\small\begin{verbatim} +$ SEDCMD='s/^.* \.plt.*$/.../;/\[.*\.text/,/\[.*\.got/d' +$ SEDCMD2='/Section to Segment/,$d;/^Key to/,/^Program/d;/^[A-Z]/d;/^ *$/d' +$ cat > test4.c <<EOF +int foo [4096]; +int main (void) { return 0; } +EOF +$ gcc -Wl,--verbose 2>&1 \ + | sed '/^===/,/^===/!d;/^===/d;s/0x08048000/0x08000000/' > test4.lds +$ gcc -s -O2 -o test4 test4.c -Wl,-T,test4.lds +$ readelf -Sl ./test4 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08000114 000114 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08000128 000128 000020 00 A 0 0 4 + [ 3] .hash HASH 08000148 000148 000024 04 A 4 0 4 + [ 4] .dynsym DYNSYM 0800016c 00016c 000040 10 A 5 1 4 + [ 5] .dynstr STRTAB 080001ac 0001ac 000045 00 A 0 0 1 + [ 6] .gnu.version VERSYM 080001f2 0001f2 000008 02 A 4 0 2 + [ 7] .gnu.version_r VERNEED 080001fc 0001fc 000020 00 A 5 1 4 + [ 8] .rel.dyn REL 0800021c 00021c 000008 08 A 4 0 4 + [ 9] .rel.plt REL 08000224 000224 000008 08 A 4 b 4 + [10] .init PROGBITS 0800022c 00022c 000017 00 AX 0 0 4 +... + [22] .bss NOBITS 08001500 000500 004020 00 WA 0 0 32 + [23] .comment PROGBITS 00000000 000500 000132 00 0 0 1 + [24] .shstrtab STRTAB 00000000 000632 0000be 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08000034 0x08000034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000114 0x08000114 0x08000114 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08000000 0x08000000 0x003fc 0x003fc R E 0x1000 + LOAD 0x0003fc 0x080013fc 0x080013fc 0x000fc 0x04124 RW 0x1000 + DYNAMIC 0x000408 0x08001408 0x08001408 0x000c8 0x000c8 RW 0x4 + NOTE 0x000128 0x08000128 0x08000128 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +$ prelink -N ./test4 +$ readelf -Sl ./test4 | sed -e "$SEDCMD" -e "$SEDCMD2" + [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + [ 0] NULL 00000000 000000 000000 00 0 0 0 + [ 1] .interp PROGBITS 08000134 000134 000013 00 A 0 0 1 + [ 2] .note.ABI-tag NOTE 08000148 000148 000020 00 A 0 0 4 + [ 3] .hash HASH 08000168 000168 000024 04 A 4 0 4 + [ 4] .dynsym DYNSYM 0800018c 00018c 000040 10 A 22 1 4 + [ 5] .gnu.version VERSYM 080001f2 0001f2 000008 02 A 4 0 2 + [ 6] .gnu.version_r VERNEED 080001fc 0001fc 000020 00 A 22 1 4 + [ 7] .rel.dyn REL 0800021c 00021c 000008 08 A 4 0 4 + [ 8] .rel.plt REL 08000224 000224 000008 08 A 4 a 4 + [ 9] .init PROGBITS 0800022c 00022c 000017 00 AX 0 0 4 +... + [21] .bss NOBITS 08001500 0004f8 004020 00 WA 0 0 32 + [22] .dynstr STRTAB 080064f8 0004f8 000058 00 A 0 0 1 + [23] .gnu.liblist GNU_LIBLIST 08006550 000550 000028 14 A 22 0 4 + [24] .gnu.conflict RELA 08006578 000578 0000c0 0c A 4 0 4 + [25] .comment PROGBITS 00000000 000638 000132 00 0 0 1 + [26] .gnu.prelink_undo PROGBITS 00000000 00076c 0004d4 01 0 0 4 + [27] .shstrtab STRTAB 00000000 000c40 0000eb 00 0 0 1 + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x08000034 0x08000034 0x000e0 0x000e0 R E 0x4 + INTERP 0x000134 0x08000134 0x08000134 0x00013 0x00013 R 0x1 + [Requesting program interpreter: /lib/ld-linux.so.2] + LOAD 0x000000 0x08000000 0x08000000 0x003fc 0x003fc R E 0x1000 + LOAD 0x0003fc 0x080013fc 0x080013fc 0x000fc 0x04124 RW 0x1000 + LOAD 0x0004f8 0x080064f8 0x080064f8 0x00140 0x00140 RW 0x1000 + DYNAMIC 0x000408 0x08001408 0x08001408 0x000c8 0x000c8 RW 0x4 + NOTE 0x000148 0x08000148 0x08000148 0x00020 0x00020 R 0x4 + STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 +\end{verbatim}} +\prelinklistingcaption{Reshuffling of an executable with addition of a new segment}} + +\begin{figure}[!ht] +\centering +\includegraphics[width=\textwidth]{newseg} +\caption{Reshuffling of an executable with addition of a new segment} +\end{figure} + +In the last example, base address was not decreased but instead a new +\tts{PT\_LOAD} segment has been added. + +\tts{R\_<arch>\_COPY} relocations are typically against first part of the +\tts{SHT\_NOBITS} \tts{.bss} section. So that \tts{prelink} can apply them, +it needs to first change their section to \tts{SHT\_PROGBITS}, but as \tts{.bss} +section typically occupies much larger part of memory, it is not desirable +to convert \tts{.bss} section into \tts{SHT\_PROGBITS} as whole. A section +cannot be partly \tts{SHT\_PROGBITS} and partly \tts{SHT\_NOBITS}, so \tts{prelink} +first splits the section into two parts, first \tts{.dynbss} which covers area +from the start of \tts{.bss} section up to highest byte to which some COPY +relocation is applied and then the old \tts{.bss}. The first is converted +to \tts{SHT\_PROGBITS} and its size is decreased, the latter stays \tts{SHT\_NOBITS} +and its start address and file offset are adjusted as well as its size decreased. +The dynamic linker handles relocations in the executable last, so \tts{prelink} +cannot just copy memory from the shared library where the symbol of the COPY +relocation has been looked up in. There might be relocations applied by the +dynamic linker in normal relocation processing to the objects, so \tts{prelink} +has to first process the relocations against that memory area. Relocations +which don't need conflict fixups are already applied, so \tts{prelink} just +needs to apply conflict fixups against the memory area, then copy it +to the newly created \tts{.dynbss} section. + +Here is an example which shows various things which COPY relocation handling +in \tts{prelink} needs to deal with: + +\noindent{{\small\begin{verbatim} +$ cat > test1.c <<EOF +struct A { char a; struct A *b; int *c; int *d; }; +int bar, baz; +struct A foo = { 1, &foo, &bar, &baz }; +int *addr (void) { return &baz; } +EOF +$ cat > test.c <<EOF +#include <stdio.h> +struct A { char a; struct A *b; int *c; int *d; }; +int bar, *addr (void), big[8192]; +extern struct A foo; +int main (void) +{ + printf ("%p: %d %p %p %p %p %p\n", &foo, foo.a, foo.b, foo.c, foo.d, + &bar, addr ()); +} +EOF +$ gcc -nostdlib -shared -fpic -s -o test1.so test1.c +$ gcc -s -o test test.c ./test1.so +$ ./test +0x80496c0: 1 0x80496c0 0x80516e0 0x4833a4 0x80516e0 0x4833a4 +$ readelf -r test | sed '/\.rel\.dyn/,/\.rel\.plt/!d;/^0/!d' +080496ac 00000c06 R_386_GLOB_DAT 00000000 __gmon_start__ +080496c0 00000605 R_386_COPY 080496c0 foo +$ readelf -S test | grep bss + [22] .bss NOBITS 080496c0 0006c0 008024 00 WA 0 0 32 +$ prelink -N ./test ./test1.so +$ readelf -s test | grep foo + 6: 080496c0 16 OBJECT GLOBAL DEFAULT 25 foo +$ readelf -s test1.so | grep foo + 15: 004a9314 16 OBJECT GLOBAL DEFAULT 6 foo +$ readelf -r test | sed '/.gnu.conflict/,/\.rel\.dyn/!d;/^0/!d' +004a9318 00000001 R_386_32 080496c0 +004a931c 00000001 R_386_32 080516e0 +005f9874 00000001 R_386_32 fffffff0 +005f9878 00000001 R_386_32 00000001 +005f98bc 00000001 R_386_32 fffffff4 +005f9900 00000001 R_386_32 ffffffec +005f9948 00000001 R_386_32 ffffffdc +005f995c 00000001 R_386_32 ffffffe0 +005f9980 00000001 R_386_32 fffffff8 +005f9988 00000001 R_386_32 ffffffe4 +005f99a4 00000001 R_386_32 ffffffd8 +005f99c4 00000001 R_386_32 ffffffe8 +005f99d8 00000001 R_386_32 08048584 +004c2510 00000007 R_386_JUMP_SLOT 00534460 +004c2514 00000007 R_386_JUMP_SLOT 00534080 +004c2518 00000007 R_386_JUMP_SLOT 00534750 +004c251c 00000007 R_386_JUMP_SLOT 005342c0 +004c2520 00000007 R_386_JUMP_SLOT 00534200 +$ objdump -s -j .dynbss test + +test: file format elf32-i386 + +Contents of section .dynbss: + 80496c0 01000000 c0960408 e0160508 a4934a00 ..............J. +$ objdump -s -j .data test1.so + +test1.so: file format elf32-i386 + +Contents of section .data: + 4a9314 01000000 14934a00 a8934a00 a4934a00 ......J...J...J. +$ readelf -S test | grep bss + [24] .dynbss PROGBITS 080496c0 0016c0 000010 00 WA 0 0 32 + [25] .bss NOBITS 080496d0 0016d0 008014 00 WA 0 0 32 +$ sed 's/8192/1/' test.c > test2.c +$ gcc -s -o test2 test2.c ./test1.so +$ readelf -S test2 | grep bss + [22] .bss NOBITS 080496b0 0006b0 00001c 00 WA 0 0 8 +$ prelink -N ./test2 ./test1.so +$ readelf -S test2 | grep bss + [22] .dynbss PROGBITS 080496b0 0006b0 000010 00 WA 0 0 8 + [23] .bss PROGBITS 080496c0 0006c0 00000c 00 WA 0 0 8 +\end{verbatim}} +\prelinklistingcaption{Relocation handling of \tts{.dynbss} objects}} + +Because \tts{test.c} executable is not compiled as position independent code and +takes address of {\sl foo} variable, a COPY relocation is needed to avoid +dynamic relocation against executable's read-only \tts{PT\_LOAD} segment. +The {\sl foo} object in \tts{test1.so} has one field with no relocations +applied at all, one relocation against the variable itself, one relocation +which needs a conflict fixup (as it is overridden by the variable in the +executable) and one with relocation which doesn't need any fixups. +The first and last field contain already the right values in prelinked +\tts{test1.so}, while second and third one need to be changed for symbol +addresses in the executable (as shown in the \tts{objdump} output). +The conflict fixups against {\sl foo} in \tts{test1.so} need to stay +(unless it is a C++ virtual table or {\sl RTTI} data, i.e. not in this testcase). +In \tts{test}, \tts{prelink} changed \tts{.dynbss} to \tts{SHT\_PROGBITS} +and kept \tts{SHT\_NOBITS} \tts{.bss}, while in slightly modified testcase +(\tts{test2}) the size of \tts{.bss} was small enough that \tts{prelink} +chose to make it \tts{SHT\_PROGBITS} too and grow the read-write +\tts{PT\_LOAD} segment and put \tts{.dynstr} and \tts{.gnu.conflict} +sections after it. + +\section{Prelink undo operation} + +Prelinking of shared libraries and executables is designed to be reversible, +so that prelink operation followed by undo operation generates bitwise +identical file to the original before prelinking. For this operation +\tts{prelink} stores the original \tts{ELF} header, all the program and +all section headers into a \tts{.gnu.prelink\_undo} section before it starts prelinking +an unprelinked executable or shared library. When undoing the modifications, +\tts{prelink} has to convert \tts{RELA} back to \tts{REL} first if \tts{REL} +to \tts{RELA} conversion was done during prelinking and all allocated +sections above it relocated down to adjust for the section shrink. +Relocation types which were changed when trying to avoid \tts{REL} to +\tts{RELA} conversion need to be changed back (e.g. on IA-32, it is +assumed \tts{R\_386\_GLOB\_DAT} relocations should be only those +against \tts{.got} section and \tts{R\_386\_32} relocations in the +remaining places). On \tts{RELA} architectures, the memory pointed +by \tts{r\_offset} field of the relocations needs to be reinitialized +to the values stored there by the linker originally. +For \tts{prelink} it doesn't matter much what this value is (e.g. +always 0, copy of \tts{r\_addend}, etc.), as long as it is computable +from the information \tts{prelink} has during undo operation +\footnote{Such as relocation type, \tts{r\_addend} value, +type, binding, flags or other attributes of relocation's symbol, +what section the relocation points into or the offset within +section it points to.}. The GNU linker had to be changed on several +architectures, so that it stores there such a value, as in several places +the value e.g. depended on original addend before final link (which is +not available anywhere after final link time, since \tts{r\_addend} +field could be adjusted during the final link). +If second word of \tts{.got} section has been modified, it needs +to be reverted back to the original value (on most architectures zero). +In executables, sections which were moved during prelinking need to be +put back and segments added while prelinking must be removed. + +There are 3 different ways how an undo operation can be performed: +\begin{itemize} +\item Undoing individual executables or shared libraries specified on the +command line in place (i.e. when the undo operation is successful, +the prelinked executable or library is atomically replaced with the +undone object). +\item With \tts{-o} option, only a single executable or shared library +given on the command line is undone and stored to the file specified +as \tts{-o} option's argument. +\item With \tts{-ua} options, \tts{prelink} builds a list of executables +in paths written in its config file (plus directories and executables +or libraries from command line) and all shared libraries these executables +depend on. All executables and libraries in the list are then unprelinked. +This option is used to unprelink the whole system. It is not perfect +and needs to be worked on, since e.g. if some executable uses some shared +library which no other executable links against, this executable (and shared +library) is prelinked, then the executable is removed (e.g. uninstalled) +but the shared library is kept, then the shared library is not +unprelinked unless specifically mentioned on the command line. +\end{itemize} + +\section{Verification of prelinked files} + +As \tts{prelink} needs to modify executables and shared libraries installed +on a system, it complicates system integrity verification (e.g. \tts{rpm -V}, +TripWire). These systems store checksums of installed files into some +database and during verification compute them again and compare to the +values stored in the database. On a prelinked system most of the executables +and shared libraries would be reported as modified. \tts{Prelink} offers +a special mode for these systems, in which it verifies that unprelinking +the executable or shared library followed by immediate prelinking (with the +same base address) creates bitwise identical output with the executable +or shared library that's being verified. Furthermore, depending on +other \tts{prelink} options, it either writes the unprelinked image +to its standard output or computes MD5 or SHA1 digest from this unprelinked +image. Mere undo operation to a file and checksumming it is not good +enough, since an intruder could have modified e.g. conflict fixups or +memory which relocations point at, changing a behavior of the program +while file after unprelinking would be unmodified. + +During verification, both \tts{prelink} executable and the dynamic linker +are used, so a proper system integrity verification first checks whether +\tts{prelink} executable (which is statically linked for this reason) hasn't +been modified, then uses \tts{prelink --verify} to verify the dynamic linker +(when verificating \tts{ld.so} the dynamic linker is not executed) +followed by verification of other executables and libraries. + +Verification requires all dependencies of checked object to be unmodified +since last prelinking. If some dependency has been changed or is missing, +\tts{prelink} will report it and return with non-zero exit status. +This is because prelinking depends on their content and so if they are +modified, the executable or shared library might be different to one after +unprelinking followed by prelinking again. In the future, perhaps it +would be possible to even verify executables or shared libraries without +unmodified dependencies, under the assumption that in such case the prelink +information will not be used. It would just need to verify that nothing +else but the information only used when dependencies are up to date +has changed between the executable or library on the filesystem and file +after unprelink followed by prelink cycle. The prelink operation +would need to be modified in this case, so that no information is +collected from the dynamic linker, the list of dependencies is assumed +to be the one stored in the executable and expect it to have identical +number of conflict fixups. + +\section{Measurements} + +There are two areas where \tts{prelink} can speed things up noticeably. +The primary is certainly startup time of big GUI applications where the +dynamic linker spends from 100ms up to a few seconds before giving control +to the application. Another area is when lots of small programs are started +up, but their execution time is rather short, so the startup time which +\tts{prelink} optimizes is a noticeable fraction of the total time. +This is typical for shell scripting. + +First numbers are from \tts{lmbench} benchmark, version 3.0-a3. +Most of the benchmarks in \tts{lmbench} suite measure kernel speed, +so it doesn't matter much whether \tts{prelink} is used or not. +Only in \tts{lat\_proc} benchmark \tts{prelink} shows up visibly. +This benchmark measures 3 different things: +\begin{itemize} +\item {\sl fork proc}, which is \tts{fork()} followed by immediate +\tts{exit(1)} in the child and \tts{wait(0)} in the parent. The results +are (as expected) about the same between unprelinked and prelinked systems. +\item {\sl exec proc}, i.e. \tts{fork()} followed by immediate +\tts{close(1)} and \tts{execve()} of a simple hello world program (this +program is compiled and linked during the benchmark into a temporary +directory and is never prelinked). The numbers are 160$\mu$s to 200$\mu$s +better on prelinked systems, because there is no relocation processing needed +initially in the dynamic linker and because all relative relocations +in \tts{libc.so.6} can be skipped. +\item {\sl sh proc}, i.e. \tts{fork()} followed by immediate \tts{close(1)} +and \tts{execlp("/bin/sh", "sh", "-c", "/tmp/hello", 0)}. Although +the hello world program is not prelinked in this case either, the shell is, +so out of the 900$\mu$s to 1000$\mu$s speedup less than 200$\mu$s can be +accounted on the speed up of the hello world program as in {\sl exec proc} +benchmark and the rest to the speedup of shell startup. +\end{itemize} + +First 4 rows are from running the benchmark on a fully unprelinked system, +the last 4 rows on the same system, but fully prelinked. + +\noindent{{\small\begin{verbatim} + L M B E N C H 3 . 0 S U M M A R Y + ------------------------------------ + (Alpha software, do not distribute) + +Processor, Processes - times in microseconds - smaller is better +------------------------------------------------------------------------- +Host OS Mhz null null open slct sig sig fork exec sh + call I/O stat clos TCP inst hndl proc proc proc +---- ------------ ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- +pork Linux 2.4.22 651 0.53 0.97 6.20 8.10 41.2 1.44 4.30 276. 1497 5403 +pork Linux 2.4.22 651 0.53 0.95 6.14 7.91 37.8 1.43 4.34 274. 1486 5391 +pork Linux 2.4.22 651 0.56 0.94 6.18 8.09 43.4 1.41 4.30 251. 1507 5423 +pork Linux 2.4.22 651 0.53 0.94 6.12 8.09 41.0 1.43 4.40 256. 1497 5385 +pork Linux 2.4.22 651 0.56 0.94 5.79 7.58 39.1 1.41 4.30 271. 1319 4460 +pork Linux 2.4.22 651 0.56 0.92 5.76 7.40 38.9 1.41 4.30 253. 1304 4417 +pork Linux 2.4.22 651 0.56 0.95 6.20 7.83 37.7 1.41 4.37 248. 1323 4481 +pork Linux 2.4.22 651 0.56 1.01 6.04 7.77 37.9 1.43 4.32 256. 1324 4457 +\end{verbatim}} +\prelinklistingcaption{\tts{lmbench} results without and with prelinking}} + +Below is a sample timing of a 239K long configure shell script from GCC +on both unprelinked and prelinked system. Preparation step was following: + +\noindent{{\small\begin{verbatim} +cd; cvs -d :pserver:anoncvs@subversions.gnu.org:/cvsroot/gcc login +# Empty password +cvs -d :pserver:anoncvs@subversions.gnu.org:/cvsroot/gcc -z3 co -D20031103 gcc +mkdir ~/gcc/obj +cd ~/gcc/obj; ../configure i386-redhat-linux; make configure-gcc +\end{verbatim}} +\prelinklistingcaption{Preparation script for shell script tests}} + +On an unprelinked system, the results were: + +\noindent{{\small\begin{verbatim} +cd ~/gcc/obj/gcc +for i in 1 2; do ./config.status --recheck > /dev/null 2>&1; done +for i in 1 2 3 4; do time ./config.status --recheck > /dev/null 2>&1; done + +real 0m4.436s +user 0m1.730s +sys 0m1.260s + +real 0m4.409s +user 0m1.660s +sys 0m1.340s + +real 0m4.431s +user 0m1.810s +sys 0m1.300s + +real 0m4.432s +user 0m1.670s +sys 0m1.210s +\end{verbatim}} +\prelinklistingcaption{Shell script test results on unprelinked system}} + +and on a fully prelinked system: + +\noindent{{\small\begin{verbatim} +cd ~/gcc/obj/gcc +for i in 1 2; do ./config.status --recheck > /dev/null 2>&1; done +for i in 1 2 3 4; do time ./config.status --recheck > /dev/null 2>&1; done + +real 0m4.126s +user 0m1.590s +sys 0m1.240s + +real 0m4.151s +user 0m1.620s +sys 0m1.230s + +real 0m4.161s +user 0m1.600s +sys 0m1.190s + +real 0m4.122s +user 0m1.570s +sys 0m1.230s +\end{verbatim}} +\prelinklistingcaption{Shell script test results on prelinked system}} + +Now timing of a few big GUI programs. All timings were done without X +server running and with \tts{DISPLAY} environment variable not set +(so that when control is transfered to the application, it very soon +finds out there is no X server it can talk to and bail out). The +measurements are done by the dynamic linker in ticks on a 651MHz +dual Pentium III machine, i.e. ticks have to be divided by 651000000 +to get times in seconds. Each application has been run 4 times +and the results with smallest total time spent in the dynamic +linker was chosen. Epiphany WWW browser and Evolution mail client +were chosen as examples of \tts{Gtk+} applications (typically they use +really many shared libraries, but many of them are quite small, +there aren't really many relocations nor conflict fixups and most +of the libraries are written in C) and Konqueror WWW browser and +KWord word processor were chosen as examples of \tts{KDE} applications +(typically they use slightly fewer shared libraries, though +still a lot, most of the shared libraries are written in C++, +have many relocations and cause many conflict fixups, especially +without C++ conflict fixup optimizations in \tts{prelink}). +On non-prelinked system, the timings are done with lazy binding, +i.e. without \tts{LD\_BIND\_NOW=1} set in the environment. +This is because that's how people generally run programs, on the other +side it is not exact apples to apples comparison, since on prelinked +system there is no lazy binding with the exception of shared libraries +loaded through \tts{dlopen}. So when control is passed to the application, +prelinked programs should be slightly faster for a while since non-prelinked +programs will have to do symbol lookups and processing relocations +(and on various architectures flushing instruction caches) whenever +they call some function they haven't called before in particular shared +library or in the executable. + +\noindent{{\small\begin{verbatim} +$ ldd `which epiphany-bin` | wc -l + 64 +$ # Unprelinked system +$ LD_DEBUG=statistics epiphany-bin 2>&1 | sed 's/^ *//' +18960: +18960: runtime linker statistics: +18960: total startup time in dynamic loader: 67336593 clock cycles +18960: time needed for relocation: 58119983 clock cycles (86.3%) +18960: number of relocations: 6999 +18960: number of relocations from cache: 4770 +18960: number of relative relocations: 31494 +18960: time needed to load objects: 8696104 clock cycles (12.9%) + +(epiphany-bin:18960): Gtk-WARNING **: cannot open display: +18960: +18960: runtime linker statistics: +18960: final number of relocations: 7692 +18960: final number of relocations from cache: 4770 +$ # Prelinked system +$ LD_DEBUG=statistics epiphany-bin 2>&1 | sed 's/^ *//' +25697: +25697: runtime linker statistics: +25697: total startup time in dynamic loader: 7313721 clock cycles +25697: time needed for relocation: 565680 clock cycles (7.7%) +25697: number of relocations: 0 +25697: number of relocations from cache: 1205 +25697: number of relative relocations: 0 +25697: time needed to load objects: 6179467 clock cycles (84.4%) + +(epiphany-bin:25697): Gtk-WARNING **: cannot open display: +25697: +25697: runtime linker statistics: +25697: final number of relocations: 31 +25697: final number of relocations from cache: 1205 + +$ ldd `which evolution` | wc -l + 68 +$ # Unprelinked system +$ LD_DEBUG=statistics evolution 2>&1 | sed 's/^ *//' +19042: +19042: runtime linker statistics: +19042: total startup time in dynamic loader: 54382122 clock cycles +19042: time needed for relocation: 43403190 clock cycles (79.8%) +19042: number of relocations: 3452 +19042: number of relocations from cache: 2885 +19042: number of relative relocations: 34957 +19042: time needed to load objects: 10450142 clock cycles (19.2%) + +(evolution:19042): Gtk-WARNING **: cannot open display: +19042: +19042: runtime linker statistics: +19042: final number of relocations: 4075 +19042: final number of relocations from cache: 2885 +$ # Prelinked system +$ LD_DEBUG=statistics evolution 2>&1 | sed 's/^ *//' +25723: +25723: runtime linker statistics: +25723: total startup time in dynamic loader: 9176140 clock cycles +25723: time needed for relocation: 203783 clock cycles (2.2%) +25723: number of relocations: 0 +25723: number of relocations from cache: 525 +25723: number of relative relocations: 0 +25723: time needed to load objects: 8405157 clock cycles (91.5%) + +(evolution:25723): Gtk-WARNING **: cannot open display: +25723: +25723: runtime linker statistics: +25723: final number of relocations: 31 +25723: final number of relocations from cache: 525 + +$ ldd `which konqueror` | wc -l + 37 +$ # Unprelinked system +$ LD_DEBUG=statistics konqueror 2>&1 | sed 's/^ *//' +18979: +18979: runtime linker statistics: +18979: total startup time in dynamic loader: 131985703 clock cycles +18979: time needed for relocation: 127341077 clock cycles (96.4%) +18979: number of relocations: 25473 +18979: number of relocations from cache: 53594 +18979: number of relative relocations: 31171 +18979: time needed to load objects: 4318803 clock cycles (3.2%) +konqueror: cannot connect to X server +18979: +18979: runtime linker statistics: +18979: final number of relocations: 25759 +18979: final number of relocations from cache: 53594 +$ # Prelinked system +$ LD_DEBUG=statistics konqueror 2>&1 | sed 's/^ *//' +25733: +25733: runtime linker statistics: +25733: total startup time in dynamic loader: 5533696 clock cycles +25733: time needed for relocation: 1941489 clock cycles (35.0%) +25733: number of relocations: 0 +25733: number of relocations from cache: 2066 +25733: number of relative relocations: 0 +25733: time needed to load objects: 3217736 clock cycles (58.1%) +konqueror: cannot connect to X server +25733: +25733: runtime linker statistics: +25733: final number of relocations: 0 +25733: final number of relocations from cache: 2066 + +$ ldd `which kword` | wc -l + 40 +$ # Unprelinked system +$ LD_DEBUG=statistics kword 2>&1 | sed 's/^ *//' +19065: +19065: runtime linker statistics: +19065: total startup time in dynamic loader: 153684591 clock cycles +19065: time needed for relocation: 148255294 clock cycles (96.4%) +19065: number of relocations: 26231 +19065: number of relocations from cache: 55833 +19065: number of relative relocations: 30660 +19065: time needed to load objects: 5068746 clock cycles (3.2%) +kword: cannot connect to X server +19065: +19065: runtime linker statistics: +19065: final number of relocations: 26528 +19065: final number of relocations from cache: 55833 +$ # Prelinked system +$ LD_DEBUG=statistics kword 2>&1 | sed 's/^ *//' +25749: +25749: runtime linker statistics: +25749: total startup time in dynamic loader: 6516635 clock cycles +25749: time needed for relocation: 2106856 clock cycles (32.3%) +25749: number of relocations: 0 +25749: number of relocations from cache: 2130 +25749: number of relative relocations: 0 +25749: time needed to load objects: 4008585 clock cycles (61.5%) +kword: cannot connect to X server +25749: +25749: runtime linker statistics: +25749: final number of relocations: 0 +25749: final number of relocations from cache: 2130 +\end{verbatim}} +\prelinklistingcaption{Dynamic linker statistics for unprelinked and prelinked GUI programs}} + +In the case of above mentioned \tts{Gtk+} applications, the original startup +time spent in the dynamic linker decreased into 11\% to 17\% of the original +times, with \tts{KDE} applications it decreased even into around 4.2\% of original +times. + +The startup time reported by the dynamic linker is only part of the total +startup time of a GUI program. Unfortunately it cannot be measured very +accurately without patching each application separately, so that it would +print current process CPU time at the point when all windows are painted and +the process starts waiting for user input. The following table contains +values reported by \tts{time(1)} command on each of the 4 GUI programs +running under X, both on unprelinked and fully prelinked system. +As soon as each program painted its windows, it was killed by +application's quit hot key +\footnote{\tts{Ctrl+W} for Epiphany, \tts{Ctrl+Q} for Evolution and +Konqueror and \tts{Enter} in Kword's document type choice dialog.}. +Especially the \tts{real} time values depend also on the speed of +human reactions, so each measurement was repeated 10 times. All timings +were done with hot caches, after running the applications two times +before measurement. + +\noindent{\small\begin{center} +\begin{longtable}{l|llllllllll|ll} +{\bf Type} & \multicolumn{10}{l|}{\bf Values (in seconds)} & {\bf Average} & {\bf Std.Dev.} \\ +\hline +\endhead +& \multicolumn{10}{l|}{unprelinked epiphany} && \\ +\hline +{real} & {3.053} & {2.84} & {2.996} & {2.901} & {3.019} & {2.929} & {2.883} & {2.975} & {2.922} & {3.026} & {2.954} & {0.0698} \\ +{user} & {2.33} & {2.31} & {2.28} & {2.32} & {2.44} & {2.37} & {2.29} & {2.35} & {2.34} & {2.41} & {2.344} & {0.0508} \\ +{sys} & {0.2} & {0.23} & {0.23} & {0.19} & {0.19} & {0.12} & {0.25} & {0.16} & {0.14} & {0.14} & {0.185} & {0.0440} \\ +\hline +& \multicolumn{10}{l|}{prelinked epiphany} && \\ +\hline +{real} & {2.773} & {2.743} & {2.833} & {2.753} & {2.753} & {2.644} & {2.717} & {2.897} & {2.68} & {2.761} & {2.755} & {0.0716} \\ +{user} & {2.18} & {2.17} & {2.17} & {2.12} & {2.23} & {2.26} & {2.13} & {2.17} & {2.15} & {2.15} & {2.173} & {0.0430} \\ +{sys} & {0.13} & {0.15} & {0.18} & {0.15} & {0.11} & {0.04} & {0.18} & {0.14} & {0.1} & {0.15} & {0.133} & {0.0416} \\ +\hline +& \multicolumn{10}{l|}{unprelinked evolution} && \\ +\hline +{real} & {2.106} & {1.886} & {1.828} & {2.12} & {1.867} & {1.871} & {2.242} & {1.871} & {1.862} & {2.241} & {1.989} & {0.1679} \\ +{user} & {1.12} & {1.09} & {1.15} & {1.19} & {1.17} & {1.23} & {1.15} & {1.11} & {1.17} & {1.14} & {1.152} & {0.0408} \\ +{sys} & {0.1} & {0.11} & {0.13} & {0.07} & {0.1} & {0.05} & {0.11} & {0.11} & {0.09} & {0.08} & {0.095} & {0.0232} \\ +\hline +& \multicolumn{10}{l|}{prelinked evolution} && \\ +\hline +{real} & {1.684} & {1.621} & {1.686} & {1.72} & {1.694} & {1.691} & {1.631} & {1.697} & {1.668} & {1.535} & {1.663} & {0.0541} \\ +{user} & {0.92} & {0.87} & {0.92} & {0.95} & {0.79} & {0.86} & {0.94} & {0.87} & {0.89} & {0.86} & {0.887} & {0.0476} \\ +{sys} & {0.06} & {0.1} & {0.06} & {0.05} & {0.11} & {0.08} & {0.07} & {0.1} & {0.12} & {0.07} & {0.082} & {0.0239} \\ +\hline +& \multicolumn{10}{l|}{unprelinked kword} && \\ +\hline +{real} & {2.111} & {1.414} & {1.36} & {1.356} & {1.259} & {1.383} & {1.28} & {1.321} & {1.252} & {1.407} & {1.414} & {0.2517} \\ +{user} & {1.04} & {0.9} & {0.93} & {0.88} & {0.89} & {0.89} & {0.87} & {0.89} & {0.9} & {0.8} & {0.899} & {0.0597} \\ +{sys} & {0.07} & {0.04} & {0.06} & {0.05} & {0.06} & {0.1} & {0.09} & {0.08} & {0.08} & {0.12} & {0.075} & {0.0242} \\ +\hline +& \multicolumn{10}{l|}{prelinked kword} && \\ +\hline +{real} & {1.59} & {1.052} & {0.972} & {1.064} & {1.106} & {1.087} & {1.066} & {1.087} & {1.065} & {1.005} & {1.109} & {0.1735} \\ +{user} & {0.61} & {0.53} & {0.58} & {0.6} & {0.6} & {0.58} & {0.59} & {0.61} & {0.57} & {0.6} & {0.587} & {0.0241} \\ +{sys} & {0.08} & {0.08} & {0.06} & {0.06} & {0.03} & {0.07} & {0.06} & {0.03} & {0.06} & {0.04} & {0.057} & {0.0183} \\ +\hline +& \multicolumn{10}{l|}{unprelinked konqueror} && \\ +\hline +{real} & {1.306} & {1.386} & {1.27} & {1.243} & {1.227} & {1.286} & {1.262} & {1.322} & {1.345} & {1.332} & {1.298} & {0.0495} \\ +{user} & {0.88} & {0.86} & {0.88} & {0.9} & {0.87} & {0.83} & {0.83} & {0.86} & {0.86} & {0.89} & {0.866} & {0.0232} \\ +{sys} & {0.07} & {0.11} & {0.12} & {0.1} & {0.12} & {0.08} & {0.13} & {0.12} & {0.09} & {0.08} & {0.102} & {0.0210} \\ +\hline +& \multicolumn{10}{l|}{prelinked konqueror} && \\ +\hline +{real} & {1.056} & {0.962} & {0.961} & {0.906} & {0.927} & {0.923} & {0.933} & {0.958} & {0.955} & {1.142} & {0.972} & {0.0722} \\ +{user} & {0.56} & {0.6} & {0.56} & {0.52} & {0.57} & {0.58} & {0.5} & {0.57} & {0.61} & {0.55} & {0.562} & {0.0334} \\ +{sys} & {0.1} & {0.13} & {0.08} & {0.15} & {0.07} & {0.09} & {0.09} & {0.09} & {0.1} & {0.08} & {0.098} & {0.0244} \\ +\hline +\multicolumn{13}{l}{} \\ +\caption{GUI program start up times without and with prelinking} \\ +\end{longtable} +\end{center}} + +\tts{OpenOffice.org} is probably the largest program these days in Linux, +mostly written in C++. In \tts{OpenOffice.org} 1.1, the main executable, +\tts{soffice.bin}, links directly against 34 shared libraries, but typically +during startup it loads using \tts{dlopen} many others. As has been +mentioned earlier, \tts{prelink} cannot speed up loading shared libraries +using \tts{dlopen}, since it cannot predict in which order and what +shared libraries will be loaded (and thus cannot compute conflict fixups). +The \tts{soffice.bin} is typically started through a wrapper script +and depending on what arguments are passed to it, different +\tts{OpenOffice.org} application is started. With no options, it starts +just empty window with menu from which the applications can be started, +with say \tts{private:factory/swriter} argument it starts +a word processor, with \tts{private:factory/scalc} it starts a spreadsheet +etc. When \tts{soffice.bin} is already running, if you start another +copy of it, it just instructs the already running copy to pop up a new +window and exits. + +In an experiment, \tts{soffice.bin} has been invoked 7 times against running +X server, with no arguments, \tts{private:factory/swriter}, +\tts{private:factory/scalc}, \tts{private:factory/sdraw}, +\tts{private:factory/simpress}, \tts{private:factory/smath} arguments +(in all these cases nothing was pressed at all) and last with +the \tts{private:factory/swriter} argument where the menu item \tts{New Presentation} +was selected and the word processor window closed. +In all these cases, \tts{/proc/`pidof soffice.bin`/maps} file was +captured and the application then killed. This file contains among +other things list of all shared libraries mmapped by the process at +the point where it started waiting for user input after loading up. +These lists were then summarized, to get number of the runs in +which particular shared library was loaded up out of the total 7 +runs. There were 38 shared libraries shipped as part of \tts{OpenOffice.org} +package which have been loaded in all 7 times, another 3 shared +libraries included in \tts{OpenOffice.org} (and also one shared +library shipped in another package, \tts{libdb\_cxx-4.1.so}) +which were loaded 6 times. +\footnote{In all runs but when ran without +arguments. But when the application is started without any +arguments, it cannot do any useful work, so one loads one of the +applications afterward anyway.} There was one shared library +loaded in 5 runs, but was locale specific and thus not worth +considering. Inspecting \tts{OpenOffice.org} source, these shared +libraries are never unloaded with \tts{dlclose}, so \tts{soffice.bin} +can be made much more \tts{prelink} friendly and thus save substantial +amount of startup time by linking against all those 76 shared libraries +instead of just 34 shared libraries it is linked against. +In the timings below, \tts{soffice1.bin} is the original \tts{soffice.bin} +as created by the \tts{OpenOffice.org} makefiles and \tts{soffice3.bin} is +the same executable linked dynamically against additional 42 shared libraries. +The ordering of those 42 shared libraries matters for the number of conflict +fixups, unfortunately with large C++ shared libraries there is no obvious rule +for ordering them as sometimes it is more useful when a shared library precedes +its dependency and sometimes vice versa, so a few different orderings were +tried in several steps and always the one with smallest number of conflict +fixups was chosen. Still, the number of conflict fixups is quite high +and big part of the fixups are storing addresses of \tts{PLT} slots in +the executable into various places in shared libraries +\footnote{This might get better when the linker is modified to handle +calls without ever taking address of the function in executables specially, +but only testing it will actually show it up.} +\tts{soffice2.bin} is another experiment, where the executable itself is empty +source file, all objects which were originally in \tts{soffice.bin} +executable with the exception of start files were recompiled as position independent +code and linked into a new shared library. This reduced number of conflicts +a lot and speeded up start up times against \tts{soffice3.bin} when caches +are hot. It is a little bit slower than \tts{soffice3.bin} when running +with cold caches (e.g. for the first time after bootup), as there is one +more shared library to load etc. + +In the timings below, numbers for \tts{soffice1.bin} and \tts{soffice2.bin} +resp. \tts{soffice3.bin} cannot be easily compared, as \tts{soffice1.bin} +loads less than half of the needed shared libraries which the remaining +two executables load and the time to load those shared libraries doesn't +show up there. Still, when it is prelinked it takes just slightly more +than two times longer to load \tts{soffice2.bin} than \tts{soffice1.bin} +and the times are still less than 7\% of how long it takes to load +just the initial 34 shared libraries when not prelinking. + +\noindent{{\small\begin{verbatim} +$ S='s/^ *//' +$ ldd /usr/lib/openoffice/program/soffice1.bin | wc -l + 34 +$ # Unprelinked system +$ LD_DEBUG=statistics /usr/lib/openoffice/program/soffice1.bin 2>&1 | sed "$S" +19095: +19095: runtime linker statistics: +19095: total startup time in dynamic loader: 159833582 clock cycles +19095: time needed for relocation: 155464174 clock cycles (97.2%) +19095: number of relocations: 31136 +19095: number of relocations from cache: 31702 +19095: number of relative relocations: 18284 +19095: time needed to load objects: 3919645 clock cycles (2.4%) +/usr/lib/openoffice/program/soffice1.bin X11 error: Can't open display: +Set DISPLAY environment variable, use -display option +or check permissions of your X-Server +(See "man X" resp. "man xhost" for details) +19095: +19095: runtime linker statistics: +19095: final number of relocations: 31715 +19095: final number of relocations from cache: 31702 +$ # Prelinked system +$ LD_DEBUG=statistics /usr/lib/openoffice/program/soffice1.bin 2>&1 | sed "$S" +25759: +25759: runtime linker statistics: +25759: total startup time in dynamic loader: 4252397 clock cycles +25759: time needed for relocation: 1189840 clock cycles (27.9%) +25759: number of relocations: 0 +25759: number of relocations from cache: 2142 +25759: number of relative relocations: 0 +25759: time needed to load objects: 2604486 clock cycles (61.2%) +/usr/lib/openoffice/program/soffice1.bin X11 error: Can't open display: +Set DISPLAY environment variable, use -display option +or check permissions of your X-Server +(See "man X" resp. "man xhost" for details) +25759: +25759: runtime linker statistics: +25759: final number of relocations: 24 +25759: final number of relocations from cache: 2142 +$ ldd /usr/lib/openoffice/program/soffice2.bin | wc -l + 77 +$ # Unprelinked system +$ LD_DEBUG=statistics /usr/lib/openoffice/program/soffice2.bin 2>&1 | sed "$S" +19115: +19115: runtime linker statistics: +19115: total startup time in dynamic loader: 947793670 clock cycles +19115: time needed for relocation: 936895741 clock cycles (98.8%) +19115: number of relocations: 69164 +19115: number of relocations from cache: 94502 +19115: number of relative relocations: 59374 +19115: time needed to load objects: 10046486 clock cycles (1.0%) +/usr/lib/openoffice/program/soffice2.bin X11 error: Can't open display: +Set DISPLAY environment variable, use -display option +or check permissions of your X-Server +(See "man X" resp. "man xhost" for details) +19115: +19115: runtime linker statistics: +19115: final number of relocations: 69966 +19115: final number of relocations from cache: 94502 +$ # Prelinked system +$ LD_DEBUG=statistics /usr/lib/openoffice/program/soffice2.bin 2>&1 | sed "$S" +25777: +25777: runtime linker statistics: +25777: total startup time in dynamic loader: 10952099 clock cycles +25777: time needed for relocation: 3254518 clock cycles (29.7%) +25777: number of relocations: 0 +25777: number of relocations from cache: 5309 +25777: number of relative relocations: 0 +25777: time needed to load objects: 6805013 clock cycles (62.1%) +/usr/lib/openoffice/program/soffice2.bin X11 error: Can't open display: +Set DISPLAY environment variable, use -display option +or check permissions of your X-Server +(See "man X" resp. "man xhost" for details) +25777: +25777: runtime linker statistics: +25777: final number of relocations: 24 +25777: final number of relocations from cache: 5309 +$ ldd /usr/lib/openoffice/program/soffice3.bin | wc -l + 76 +$ # Unprelinked system +$ LD_DEBUG=statistics /usr/lib/openoffice/program/soffice3.bin 2>&1 | sed "$S" +19131: +19131: runtime linker statistics: +19131: total startup time in dynamic loader: 852275754 clock cycles +19131: time needed for relocation: 840996859 clock cycles (98.6%) +19131: number of relocations: 68362 +19131: number of relocations from cache: 89213 +19131: number of relative relocations: 55831 +19131: time needed to load objects: 10170207 clock cycles (1.1%) +/usr/lib/openoffice/program/soffice3.bin X11 error: Can't open display: +Set DISPLAY environment variable, use -display option +or check permissions of your X-Server +(See "man X" resp. "man xhost" for details) +19131: +19131: runtime linker statistics: +19131: final number of relocations: 69177 +19131: final number of relocations from cache: 89213 +$ # Prelinked system +$ LD_DEBUG=statistics /usr/lib/openoffice/program/soffice3.bin 2>&1 | sed "$S" +25847: +25847: runtime linker statistics: +25847: total startup time in dynamic loader: 12277407 clock cycles +25847: time needed for relocation: 4232915 clock cycles (34.4%) +25847: number of relocations: 0 +25847: number of relocations from cache: 8961 +25847: number of relative relocations: 0 +25847: time needed to load objects: 6925023 clock cycles (56.4%) +/usr/lib/openoffice/program/soffice3.bin X11 error: Can't open display: +Set DISPLAY environment variable, use -display option +or check permissions of your X-Server +(See "man X" resp. "man xhost" for details) +25847: +25847: runtime linker statistics: +25847: final number of relocations: 24 +25847: final number of relocations from cache: 8961 +\end{verbatim}} +\prelinklistingcaption{Dynamic linker statistics for unprelinked and prelinked OpenOffice.org}} + +Below are measurement using \tts{time(1)} for each of the \tts{soffice.bin} +variants, prelinked and unprelinked. \tts{OpenOffice.org} was killed +immediately after painting \tts{Writer}'s window using \tts{Ctrl+Q}. + +\noindent{\small\begin{center} +\begin{longtable}{l|llllllllll|ll} +{\bf Type} & \multicolumn{10}{l|}{\bf Values (in seconds)} & {\bf Average} & {\bf Std.Dev.} \\ +\hline +\endhead +& \multicolumn{10}{l|}{unprelinked soffice1.bin private:factory/swriter} && \\ +\hline +{real} & {5.569} & {5.149} & {5.547} & {5.559} & {5.549} & {5.139} & {5.55} & {5.559} & {5.598} & {5.559} & {5.478} & {0.1765} \\ +{user} & {4.65} & {4.57} & {4.62} & {4.64} & {4.57} & {4.55} & {4.65} & {4.49} & {4.52} & {4.46} & {4.572} & {0.0680} \\ +{sys} & {0.29} & {0.24} & {0.19} & {0.21} & {0.21} & {0.21} & {0.25} & {0.25} & {0.27} & {0.26} & {0.238} & {0.0319} \\ +\hline +& \multicolumn{10}{l|}{prelinked soffice1.bin private:factory/swriter} && \\ +\hline +{real} & {4.946} & {4.899} & {5.291} & {4.879} & {4.879} & {4.898} & {5.299} & {4.901} & {4.887} & {4.901} & {4.978} & {0.1681} \\ +{user} & {4.23} & {4.27} & {4.18} & {4.24} & {4.17} & {4.22} & {4.15} & {4.25} & {4.26} & {4.31} & {4.228} & {0.0494} \\ +{sys} & {0.22} & {0.22} & {0.24} & {0.26} & {0.3} & {0.26} & {0.29} & {0.17} & {0.21} & {0.23} & {0.24} & {0.0389} \\ +\hline +& \multicolumn{10}{l|}{unprelinked soffice2.bin private:factory/swriter} && \\ +\hline +{real} & {5.575} & {5.166} & {5.592} & {5.149} & {5.571} & {5.559} & {5.159} & {5.157} & {5.569} & {5.149} & {5.365} & {0.2201} \\ +{user} & {4.59} & {4.5} & {4.57} & {4.37} & {4.47} & {4.57} & {4.56} & {4.41} & {4.63} & {4.5} & {4.517} & {0.0826} \\ +{sys} & {0.24} & {0.24} & {0.21} & {0.34} & {0.27} & {0.19} & {0.19} & {0.27} & {0.19} & {0.29} & {0.243} & {0.0501} \\ +\hline +& \multicolumn{10}{l|}{prelinked soffice2.bin private:factory/swriter} && \\ +\hline +{real} & {3.69} & {3.66} & {3.658} & {3.661} & {3.639} & {3.638} & {3.649} & {3.659} & {3.65} & {3.659} & {3.656} & {0.0146} \\ +{user} & {2.93} & {2.88} & {2.88} & {2.9} & {2.84} & {2.63} & {2.89} & {2.85} & {2.77} & {2.83} & {2.84} & {0.0860} \\ +{sys} & {0.22} & {0.18} & {0.23} & {0.2} & {0.18} & {0.29} & {0.22} & {0.23} & {0.24} & {0.22} & {0.221} & {0.0318} \\ +\hline +& \multicolumn{10}{l|}{unprelinked soffice3.bin private:factory/swriter} && \\ +\hline +{real} & {5.031} & {5.02} & {5.009} & {5.028} & {5.019} & {5.019} & {5.019} & {5.052} & {5.426} & {5.029} & {5.065} & {0.1273} \\ +{user} & {4.31} & {4.35} & {4.34} & {4.3} & {4.38} & {4.29} & {4.45} & {4.37} & {4.38} & {4.44} & {4.361} & {0.0547} \\ +{sys} & {0.27} & {0.25} & {0.26} & {0.27} & {0.27} & {0.31} & {0.18} & {0.17} & {0.16} & {0.15} & {0.229} & {0.0576} \\ +\hline +& \multicolumn{10}{l|}{prelinked soffice3.bin private:factory/swriter} && \\ +\hline +{real} & {3.705} & {3.669} & {3.659} & {3.669} & {3.66} & {3.659} & {3.659} & {3.661} & {3.668} & {3.649} & {3.666} & {0.0151} \\ +{user} & {2.86} & {2.88} & {2.85} & {2.84} & {2.83} & {2.86} & {2.84} & {2.91} & {2.86} & {2.8} & {2.853} & {0.0295} \\ +{sys} & {0.26} & {0.19} & {0.27} & {0.25} & {0.24} & {0.23} & {0.28} & {0.21} & {0.21} & {0.27} & {0.241} & {0.0303} \\ +\hline +\multicolumn{13}{l}{} \\ +\caption{OpenOffice.org start up times without and with prelinking} \\ +\end{longtable} +\end{center}} + +\section{Similar tools on other ELF using Operating Systems} + +Something similar to \tts{prelink} is available on other \tts{ELF} +platforms. On Irix there is \tts{QUICKSTART} and on Solaris \tts{crle}. + +SGI \tts{QUICKSTART} is much closer to \tts{prelink} from these two. +The \tts{rqs} program relocates libraries to (if possible) unique +virtual address space slot. The base address is either specified +on the command line with the \tts{-l} option, or \tts{rqs} uses +a \tts{so\_locations} registry with \tts{-c} or \tts{-u} options +and finds a not yet occupied slot. This is similar to how \tts{prelink} +lays out libraries without the \tts{-m} option. + +\tts{QUICKSTART} uses the same data structure for library lists +(\tts{ElfNN\_Lib}) as \tts{prelink}, but uses more fields in it +(\tts{prelink} doesn't use \tts{l\_version} and \tts{l\_flags} fields at +the moment) and uses different dynamic tags and section type for +it. Another difference is that \tts{QUICKSTART} makes all liblist +section \tts{SHF\_ALLOC}, whether in shared libraries or executables. +\tts{prelink} only needs liblist section in the executable be allocated, +liblist sections in shared libraries are not allocated and used +at \tts{prelink} time only. + +The biggest difference between \tts{QUICKSTART} and \tts{prelink} +is in how conflicts are encoded. SGI stores them in a very compact +format, as array of \tts{.dynsym} section indexes for symbols which +are conflicting. There is no information publicly available +what exactly SGI dynamic linker does when it is resolving the conflicts, +so this is just a guess. Given that the conflicts can be stored +in a shared library or executable different to the shared library with the +relocations against the conflicting symbol and different to the shared +library which the symbol was originally resolved to, there doesn't seem +to be an obvious way how to handle the conflicts very cheaply. +The dynamic linker probably collects list of all conflicting symbol +names, for each such symbol computes \tts{ELF} hash and walks hash buckets +for this hash of all shared libraries, looking for the symbol. +Every time it finds the symbol, all relocations against it need to be +redone. Unlike this, \tts{prelink} stores conflicts as an array of +\tts{ElfNN\_Rela} structures, with one entry for each shared relocation +against conflicting symbol in some shared library. This guarantees +that there are no symbol lookups during program startup (provided +that shared libraries have not been changed after prelinking), while +with \tts{QUICKSTART} will do some symbol lookups if there are any +conflicts. \tts{QUICKSTART} puts conflict sections into the executable +and every shared library where \tts{rqs} determines conflicts while +\tts{prelink} stores them in the executable only (but the array is typically +much bigger). Disk space requirements for prelinked executables are certainly +bigger than for requickstarted executables, but which one has bigger runtime +memory requirements is unclear. If prelinking can be used, all \tts{.rela*} +and \tts{.rel*} sections in the executable and all shared libraries are skipped, +so they will not need to be paged in during whole program's life (with the +exception of first and last pages in the relocation sections which can be +paged in because of other sections on the same page), but whole +\tts{.gnu.conflict} section needs to be paged in (read-only) and processed. +With \tts{QUICKSTART}, probably all (much smaller) conflict sections need +to be paged in and also likely for each conflict whole relocation sections +of each library which needs the conflict to be applied against. + +In \tts{QUICKSTART} documentation, SGI says that conflicts are very costly +and that developers should avoid them. Unfortunately, this is sometimes quite +hard, especially with C++ shared libraries. It is unclear whether \tts{rqs} +does any optimizations to trim down the number of conflicts. + +Sun took completely different approach. The dynamic linker provides a +\tts{dldump (const char *ipath, const char *opath, int flags);} function. +{\sl ipath} is supposed to be a path to an \tts{ELF} object loaded already in +the current process. This function creates a new \tts{ELF} object at +{\sl opath}, which is like the {\sl ipath} object, but relocated to the +base address which it has actually been mapped at in the current process +and with some relocations (specified in {\sl flags} bitmask) applied as +they have been resolved in the current process. Relocations, which have +been applied, are overwritten in the relocation sections with +\tts{R\_*\_NONE} relocations. The \tts{crle} executable, in addition to other +functions not related to startup times, with some specific options uses the +\tts{dldump} function to dump all shared libraries a particular executable +uses (and the executable itself) into a new directory, with selected +relocation classes being already applied. The main disadvantage of this +approach is that such alternate shared libraries are at least for +most relocation classes not shareable across different programs at all +(and for those where they could be shareable a little bit there will +be many relocations left for the dynamic linker, so the speed gains will +be small). Another disadvantage is that all relocation sections need to +be paged into the memory, just to find out that most of the relocations +are \tts{R\_*\_NONE}. + +\section{ELF extensions for prelink} + +\tts{Prelink} needs a few \tts{ELF} extensions for its data structures +in \tts{ELF} objects. For list of dependencies at the time of prelinking, +a new section type \tts{SHT\_GNU\_LIBLIST} is defined: + +\noindent{{\small\begin{verbatim} +#define SHT_GNU_LIBLIST 0x6ffffff7 /* Prelink library list */ + +typedef struct +{ + Elf32_Word l_name; /* Name (string table index) */ + Elf32_Word l_time_stamp; /* Timestamp */ + Elf32_Word l_checksum; /* Checksum */ + Elf32_Word l_version; /* Unused, should be zero */ + Elf32_Word l_flags; /* Unused, should be zero */ +} Elf32_Lib; + +typedef struct +{ + Elf64_Word l_name; /* Name (string table index) */ + Elf64_Word l_time_stamp; /* Timestamp */ + Elf64_Word l_checksum; /* Checksum */ + Elf64_Word l_version; /* Unused, should be zero */ + Elf64_Word l_flags; /* Unused, should be zero */ +} Elf64_Lib; +\end{verbatim}} +\prelinklistingcaption{New structures and section type constants used by \tts{prelink}}} + +Introduces a few new special sections: + +\noindent{\begin{center} +\begin{longtable}{l|lc} +{\bf Name} & {\bf Type} & {\bf Attributes} \\ +\hline +& {\sl In shared libraries} & \\ +{.gnu.liblist} & {SHT\_GNU\_LIBLIST} & {0} \\ +{.gnu.libstr} & {SHT\_STRTAB} & {0} \\ +{.gnu.prelink\_undo} & {SHT\_PROGBITS} & {0} \\ +\hline +& {\sl In executables} & \\ +{.gnu.liblist} & {SHT\_GNU\_LIBLIST} & {SHF\_ALLOC} \\ +{.gnu.conflict} & {SHT\_RELA} & {SHF\_ALLOC} \\ +{.gnu.prelink\_undo} & {SHT\_PROGBITS} & {0} \\ +\multicolumn{3}{l}{} \\ +\caption{Special sections introduced by \tts{prelink}} \\ +\end{longtable} +\end{center}} + +\begin{description} +\item[\tts{.gnu.liblist}] This section contains one \tts{ElfNN\_Lib} structure +for each shared library which the object has been prelinked against, +in the order in which they appear in symbol search scope. +Section's \tts{sh\_link} value should contain section index of \tts{.gnu.libstr} +for shared libraries and section index of \tts{.dynsym} for executables. +\tts{l\_name} field contains the dependent library's name as index +into the section pointed by\tts{sh\_link} field. \tts{l\_time\_stamp} +resp. \tts{l\_checksum} should contain copies of \tts{DT\_GNU\_PRELINKED} +resp. \tts{DT\_CHECKSUM} values of the dependent library. + +\item[\tts{.gnu.conflict}] This section contains one \tts{ElfNN\_Rela} +structure for each needed \tts{prelink} conflict fixup. \tts{r\_offset} +field contains the absolute address at which the fixup needs to be applied, +\tts{r\_addend} the value that needs to be stored at that location. +\tts{ELFNN\_R\_SYM} of \tts{r\_info} field should be zero, +\tts{ELFNN\_R\_TYPE} of \tts{r\_info} field should be architecture +specific relocation type which should be handled the same as +for \tts{.rela.*} sections on the architecture. For \tts{EM\_ALPHA} machine, +all types with \tts{R\_ALPHA\_JMP\_SLOT} in lowest 8 bits of \tts{ELF64\_R\_TYPE} +should be handled as \tts{R\_ALPHA\_JMP\_SLOT} relocation, the upper +24 bits contains index in original \tts{.rela.plt} section of the +\tts{R\_ALPHA\_JMP\_SLOT} relocation the fixup was created for. + +\item[\tts{.gnu.libstr}] This section contains strings for \tts{.gnu.liblist} +section in shared libraries where \tts{.gnu.liblist} section is not +allocated. + +\item[\tts{.gnu.prelink\_undo}] This section contains \tts{prelink} private +data used for \tts{prelink --undo} operation. This data includes the +original \tts{ElfNN\_Ehdr} of the object before prelinking and all its +original \tts{ElfNN\_Phdr} and \tts{ElfNN\_Shdr} headers. +\end{description} + +\tts{Prelink} also defines 6 new dynamic tags: + +\noindent{{\small\begin{verbatim} +#define DT_GNU_PRELINKED 0x6ffffdf5 /* Prelinking timestamp */ +#define DT_GNU_CONFLICTSZ 0x6ffffdf6 /* Size of conflict section */ +#define DT_GNU_LIBLISTSZ 0x6ffffdf7 /* Size of library list */ +#define DT_CHECKSUM 0x6ffffdf8 /* Library checksum */ + +#define DT_GNU_CONFLICT 0x6ffffef8 /* Start of conflict section */ +#define DT_GNU_LIBLIST 0x6ffffef9 /* Library list */ +\end{verbatim}} +\prelinklistingcaption{\tts{Prelink} dynamic tags}} + +\tts{DT\_GNU\_PRELINKED} and \tts{DT\_CHECKSUM} dynamic tags must +be present in prelinked shared libraries. The corresponding +\tts{d\_un.d\_val} fields should contain time when the library +has been prelinked (in seconds since January, 1st, 1970, 00:00 UTC) +resp. \tts{CRC32} checksum of all sections with one of +\tts{SHF\_ALLOC}, \tts{SHF\_WRITE} or \tts{SHF\_EXECINSTR} bit set +whose type is not \tts{SHT\_NOBITS}, in the order they appear in the +shared library's section header table, with \tts{DT\_GNU\_PRELINKED} +and \tts{DT\_CHECKSUM} \tts{d\_un.v\_val} values set to 0 for +the time of checksum computation. + +The \tts{DT\_GNU\_LIBLIST} and \tts{DT\_GNU\_LIBLISTSZ} dynamic tags +must be present in all prelinked executables. The \tts{d\_un.d\_ptr} value of +the \tts{DT\_GNU\_LIBLIST} dynamic tag contains the virtual address +of the \tts{.gnu.liblist} section in the executable and \tts{d\_un.d\_val} +of \tts{DT\_GNU\_LIBLISTSZ} tag contains its size in bytes. + +\tts{DT\_GNU\_CONFLICT} and \tts{DT\_GNU\_CONFLICTSZ} dynamic tags +may be present in prelinked executables. \tts{d\_un.d\_ptr} of +\tts{DT\_GNU\_CONFLICT} dynamic tag contains the virtual address +of \tts{.gnu.conflict} section in the executable (if present) +and \tts{d\_un.d\_val} of \tts{DT\_GNU\_CONFLICTSZ} tag contains +its size in bytes. + +\begin{appendix} + +\section{Glossary} +\printglossary + +\section{References} + +\begin{description} +\item[\textrm{[1]}] +\href{http://www.caldera.com/developers/devspecs/gabi41.pdf}% +{\sl System V Application Binary Interface, Edition 4.1}. + +\item[\textrm{[2]}] +\href{http://www.caldera.com/developers/devspecs/abi386-4.pdf}% +{\sl System V Application Binary Interface, Intel 386 Architecture Processor +Supplement}. + +\item[\textrm{[3]}] +\href{http://www.x86-64.org/cgi-bin/cvsweb.cgi/x86-64-ABI/}% +{\sl System V Application Binary Interface, AMD64 Architecture Processor +Supplement}. + +\item[\textrm{[4]}] +\href{http://refspecs.freestandards.org/elf/IA64-SysV-psABI.pdf}% +{{\sl System V Application Binary Interface, Intel Itanium Architecture Processor +Supplement}, Intel Corporation, 2001}. + +\item[\textrm{[5]}] +\href{http://refspecs.freestandards.org/elf/elfspec_ppc.pdf}% +{Steve Zucker, Kari Karhi, {\sl System V Application Binary Interface, +PowerPC Architecture Processor Supplement}, SunSoft, IBM, 1995}. + +\item[\textrm{[6]}] +\href{ftp://ftp.linuxppc64.org/pub/people/amodra/PPC-elf64abi.txt.gz}% +{\sl System V Application Binary Interface, PowerPC64 Architecture Processor +Supplement}. + +\item[\textrm{[7]}] +\href{http://www.arm.com/support/566FHT/$File/ARMELF.pdf}% +{\sl System V Application Binary Interface, ARM Architecture Processor +Supplement}. + +\item[\textrm{[8]}] +\href{http://www.sparc.com/standards/SCD.2.4.1.ps.Z}% +{{\sl SPARC Compliance Definition, Version 2.4.1}, +SPARC International, Inc., 1999}. + +\item[\textrm{[9]}] +\href{http://people.redhat.com/drepper/dsohowto.pdf}% +{Ulrich Drepper, {\sl How To Write Shared Libraries}, Red Hat, Inc., 2003}. + +\item[\textrm{[10]}] +\href{http://docs.sun.com/db/doc/816-1386}% +{{\sl Linker And Library Guide}, Sun Microsystems, 2002}. + +\item[\textrm{[11]}] +\href{http://www.gzlinux.org/docs/category/dev/c/linkerandloader.pdf}% +{John R. Levine, {\sl Linkers and Loaders}, 1999}. + +\item[\textrm{[12]}] +\href{http://people.redhat.com/drepper/tls.pdf}% +{Ulrich Drepper, {\sl ELF Handling For Thread-Local Storage}, Red Hat, Inc., +2003}. + +\item[\textrm{[13]}] +\href{ftp://ftp.linuxppc64.org/pub/people/amodra/ppc32tls.txt.gz}% +{Alan Modra, {\sl PowerPC Specific Thread Local Storage ABI}, 2003}. + +\item[\textrm{[14]}] +\href{ftp://ftp.linuxppc64.org/pub/people/amodra/ppc64tls.txt.gz}% +{Alan Modra, {\sl PowerPC64 Specific Thread Local Storage ABI}, 2003}. + +\item[\textrm{[15]}] +\href{http://www.eagercon.com/dwarf/dwarf-2.0.0.pdf}% +{\sl DWARF Debugging Information Format Version 2}. + +\item[\textrm{[16]}] +\href{http://reality.sgiweb.org/davea/dwarf3-draft8-011125.pdf}% +{{\sl DWARF Debugging Information Format Version 3}, Draft, 2001}. + +\item[\textrm{[17]}] +\href{http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/gdb/doc/stabs.texinfo?cvsroot=src}% +{\sl The "stabs" debugging information format}. +\end{description} + + +\section{Revision History} + +\begin{description} +\item[2003-11-03] First draft. + + +\end{description} + +\end{appendix} + +\end{document} diff --git a/doc/prelinklisting.sty b/doc/prelinklisting.sty new file mode 100644 index 0000000..f0aff06 --- /dev/null +++ b/doc/prelinklisting.sty @@ -0,0 +1,41 @@ +\NeedsTeXFormat{LaTeX2e}[2003/12/06] +\ProvidesPackage{prelinklisting}[] +\newcounter{prelinklisting} +\renewcommand\theprelinklisting{\@arabic\c@prelinklisting} +\newcommand\prelinklistingname{Listing} +\newcommand\listprelinklistingname{List of Listings} +\def\fps@prelinklisting{tbp} +\def\ftype@prelinklisting{5} +\def\ext@prelinklisting{lol} +\def\fnum@prelinklisting{\prelinklistingname~\theprelinklisting} +\newcommand\listofprelinklistings{% +\begingroup +\csname lol@parskip\endcsname +\@ifundefined{chapter}{\section}{\chapter}*{\listprelinklistingname + \@mkboth{\MakeUppercase\listprelinklistingname}% + {\MakeUppercase\listprelinklistingname}}% + \@starttoc{\ext@prelinklisting}% +\endgroup + } +\newcommand*\l@prelinklisting{\@dottedtocline{1}{1.5em}{2.3em}} +\def\JJ@caption{% + \bgroup + \@ifnextchar[{\egroup\JJ@c@ption\@firstofone}\JJ@capti@n} +\def\JJ@capti@n{% + \@ifstar + {\egroup\JJ@c@ption\@gobble[]}% + {\egroup\@xdblarg{\JJ@c@ption\@firstofone}}} +\def\JJ@c@ption#1[#2]#3{% + \JJ@makecaption#1\fnum@prelinklisting{#3}% + \def\@tempa{#2}% + \ifx\@tempa\@empty\else + {\let\\\space + \hyperdef{prelinklisting}{\theprelinklisting}{} + \let\theHprelinklisting\theprelinklisting + \hyper@makecurrent{prelinklisting} + \addcontentsline{lol}{prelinklisting}{\protect\numberline{\theprelinklisting}{#2}}% + \addtocounter{prelinklisting}{1}}% + \fi} +\def\JJ@makecaption#1#2#3{{#1{#2: }#3\endgraf\vskip\baselineskip}} +\let\prelinklistingcaption\JJ@caption +\endinput @@ -1,4 +1,4 @@ -/* Copyright (C) 2001, 2002, 2003 Red Hat, Inc. +/* Copyright (C) 2001, 2002, 2003, 2004 Red Hat, Inc. Written by Jakub Jelinek <jakub@redhat.com>, 2001. This program is free software; you can redistribute it and/or modify @@ -259,6 +259,12 @@ prelink_exec (struct prelink_info *info) assert (j == move->new_shnum); ehdr.e_shnum = j; + if (old_sdynbss != -1 && old_dynbss == -1) + { + old_dynbss = old_sdynbss; + old_sdynbss = -1; + } + add = alloca ((rinfo.last - rinfo.first + 5) * sizeof (*add)); old = alloca ((rinfo.last - rinfo.first + 5) * sizeof (*old)); new = alloca ((rinfo.last - rinfo.first + 5) * sizeof (*new)); @@ -515,7 +521,9 @@ prelink_exec (struct prelink_info *info) { Elf_Data *data = elf_getdata (dso->scn[j], NULL); - assert (data->d_size == dso->shdr[j].sh_size); + assert (data->d_size == dso->shdr[j].sh_size + || j == new_dynbss + 1 + || j == new_sdynbss + 1); if (data->d_size) { data->d_buf = realloc (data->d_buf, data->d_size); @@ -766,7 +774,13 @@ prelink_exec (struct prelink_info *info) dso->shdr[new_dynbss] = dso->shdr[new_dynbss + 1]; - dso->shdr[new_dynbss].sh_name = shstrtabadd (dso, ".dynbss"); + if (! strcmp (strptr (dso, dso->ehdr.e_shstrndx, + dso->shdr[new_dynbss + 1].sh_name), + ".sbss") + && new_sdynbss == -1) + dso->shdr[new_dynbss].sh_name = shstrtabadd (dso, ".sdynbss"); + else + dso->shdr[new_dynbss].sh_name = shstrtabadd (dso, ".dynbss"); if (dso->shdr[new_dynbss].sh_name == 0) goto error_out; @@ -228,7 +228,7 @@ parse_opt (int key, char *arg, struct argp_state *state) return 0; } -static struct argp argp = { options, parse_opt, 0, argp_doc }; +static struct argp argp = { options, parse_opt, "[FILES]", argp_doc }; int main (int argc, char *argv[]) diff --git a/src/prelink.c b/src/prelink.c index b7c9ef6..5b3c7c3 100644 --- a/src/prelink.c +++ b/src/prelink.c @@ -302,7 +302,6 @@ prelink_prepare (DSO *dso) if (! liblist || ! libstr || ! undo) { - Elf_Data data; struct section_move *move; move = init_section_move (dso); @@ -345,7 +344,6 @@ prelink_prepare (DSO *dso) if (reopen_dso (dso, move)) { - free (data.d_buf); free (move); return 1; } |