Skip to content

Commit

Permalink
Add real EBCDIC build and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
NWilson committed Jan 16, 2025
1 parent 03c0977 commit aea53cd
Show file tree
Hide file tree
Showing 49 changed files with 3,103 additions and 1,471 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,27 @@ jobs:
- name: Test
run: bazelisk test //... --enable_runfiles --incompatible_strict_action_env --test_output=all

# XXX Maybe this should just go in the ManyConfig tests, actually??
ebcdic:
# Tests the full support for EBCDIC on a non-EBCDIC platform, using a
# hardcoded EBCDIC-1047 codepage.
name: EBCDIC
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true

- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=OFF -DPCRE2_SUPPORT_UNICODE=OFF -DPCRE2_EBCDIC=ON -DPCRE2_EBCDIC_IGNORING_COMPILER=ON -DPCRE2_DEBUG=ON -DCMAKE_C_FLAGS="$CFLAGS_GCC_STYLE" -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build

- name: Build
run: cd build && make -j3

- name: Test
run: cd build && ../RunTest

heron:
# Job to verify that the tasks performed by PrepareRelease have been done. It is
# the committer's responsibility (currently) to run PrepareRelease themselves when
Expand Down
64 changes: 55 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,10 @@ set(

set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")

set(PCRE2_EBCDIC_IGNORING_COMPILER OFF CACHE BOOL "Force EBCDIC 1047 using numeric literals rather than C character literals; implies EBCDIC.")

option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)

set(
PCRE2_LINK_SIZE
"2"
Expand Down Expand Up @@ -568,13 +572,42 @@ if(NEWLINE_DEFAULT STREQUAL "")
)
endif()

set(REBUILD_CHARTABLES OFF)
if(PCRE2_REBUILD_CHARTABLES)
set(REBUILD_CHARTABLES ON)
endif()

set(EBCDIC OFF)
if(PCRE2_EBCDIC)
set(EBCDIC 1)
set(EBCDIC ON)
endif()

if(PCRE2_EBCDIC_NL25)
set(EBCDIC 1)
set(EBCDIC_NL25 1)
set(EBCDIC ON)
set(EBCDIC_NL25 ON)
endif()

if(PCRE2_EBCDIC_IGNORING_COMPILER)
set(EBCDIC ON)
set(EBCDIC_IGNORING_COMPILER ON)
endif()

# Make sure that if EBCDIC is set (without EBCDIC_IGNORING_COMPILER), then
# REBUILD_CHARTABLES is also enabled.
# Also check that UTF support is not requested, because PCRE2 cannot handle
# EBCDIC and UTF in the same build. To do so it would need to use different
# character constants depending on the mode.
# Also, EBCDIC cannot be used with 16-bit and 32-bit libraries.
if(EBCDIC)
if(NOT EBCDIC_IGNORING_COMPILER)
set(REBUILD_CHARTABLES ON)
endif()
if(PCRE2_SUPPORT_UNICODE)
message(FATAL_ERROR "Support for EBCDIC and Unicode cannot be enabled at the same time")
endif()
if(PCRE2_BUILD_PCRE2_16 OR PCRE2_BUILD_PCRE2_32)
message(FATAL_ERROR "EBCDIC support is available only for the 8-bit library")
endif()
endif()

# Output files
Expand Down Expand Up @@ -648,8 +681,7 @@ endif()

# Character table generation

option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
if(PCRE2_REBUILD_CHARTABLES)
if(REBUILD_CHARTABLES)
add_executable(pcre2_dftables src/pcre2_dftables.c)
add_custom_command(
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
Expand All @@ -659,8 +691,12 @@ if(PCRE2_REBUILD_CHARTABLES)
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
VERBATIM
)
else()
elseif(NOT PCRE2_EBCDIC)
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
elseif(PCRE2_EBCDIC_NL25)
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl25 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
else()
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl15 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
endif()

# Source code
Expand Down Expand Up @@ -1334,9 +1370,19 @@ if(PCRE2_SHOW_REPORT)
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")

if(NOT EBCDIC)
set(EBCDIC_NL_CODE "n/a")
elseif(EBCDIC_NL25)
set(EBCDIC_NL_CODE "0x25")
else()
set(EBCDIC_NL_CODE "0x15")
endif()
message(STATUS " EBCDIC coding ..................... : ${EBCDIC}")
message(STATUS " EBCDIC code for NL ................ : ${EBCDIC_NL_CODE}")
message(STATUS " EBCDIC coding ignoring compiler ... : ${PCRE2_EBCDIC_IGNORING_COMPILER}")
message(STATUS " Rebuild char tables ............... : ${REBUILD_CHARTABLES}")

message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")
Expand Down
17 changes: 16 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,21 @@ src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
rm -f $@
./pcre2_dftables$(EXEEXT) $@
else
if WITH_EBCDIC
if WITH_EBCDIC_NL25
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25 $(abs_builddir)/src/pcre2_chartables.c
else # WITH_EBCDIC_NL25
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15 $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_EBCDIC_NL25
else # WITH_EBCDIC
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_EBCDIC
endif # WITH_REBUILD_CHARTABLES

BUILT_SOURCES = src/pcre2_chartables.c
Expand Down Expand Up @@ -460,7 +472,10 @@ endif # WITH_PCRE2_32
# The pcre2_chartables.c.dist file is the default version of
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.

EXTRA_DIST += src/pcre2_chartables.c.dist
EXTRA_DIST += \
src/pcre2_chartables.c.dist \
src/pcre2_chartables.c.ebcdic-1047-nl15 \
src/pcre2_chartables.c.ebcdic-1047-nl25
CLEANFILES += src/pcre2_chartables.c

# The JIT compiler lives in a separate directory, but its files are #included
Expand Down
25 changes: 20 additions & 5 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,22 @@ library. They are also documented in the pcre2build man page.

--enable-ebcdic --disable-unicode

This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
This automatically implies --enable-rebuild-chartables (see above), in order
to ensure that you have the correct default character tables for your system's
codepage. There is an exception when you set --enable-ebcdic-ignoring-compiler
(see below), which allows using a default set of EBCDIC 1047 character tables
rather than forcing use of --enable-rebuild-chartables.

When PCRE2 is built with EBCDIC support, it always operates in EBCDIC. It
cannot support both EBCDIC and ASCII or UTF-8/16/32.

There is a second option, --enable-ebcdic-nl25, which specifies that the code
value for the EBCDIC NL character is 0x25 instead of the default 0x15.

There is a third option, --enable-ebcdic-ignoring-compiler, which disregards
the compiler's codepage for determining the numeric value of C character
constants such as 'z', and instead forces PCRE2 to use numeric constants for
the EBCDIC 1047 codepage instead.

. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
Expand Down Expand Up @@ -822,6 +833,10 @@ The distribution should contain the files listed below.
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2_chartables.c.ebcdic-1047-{nl15,nl25} a default set of character
tables for EBCDIC 1047; used if
--enable-ebcdic-ignoring-compiler is specified
without --enable-rebuild-chartables

src/pcre2posix.c )
src/pcre2_auto_possess.c )
Expand Down
Loading

0 comments on commit aea53cd

Please sign in to comment.