From a0ab90b8819b86227c944050f223f05c19a12a39 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Sat, 1 Dec 2018 21:53:07 +0000 Subject: [PATCH] crlf: re-use existing crlf script to create odb Re-use the existing crlf data generation script for creating the to-odb dataset. Also, store the actual file contents instead of the ID so that we can identify differences instead of detecting that differences exist. --- tests/resources/crlf/.gitattributes | 1 + .../checkin_input_files => crlf}/all-crlf | 0 .../all-crlf-utf8bom | 0 .../checkin_input_files => crlf}/all-lf | 0 .../all-lf-utf8bom | 0 .../binary-all-crlf | 0 .../binary-all-lf | 0 .../binary-mixed-lf-cr | 0 .../binary-mixed-lf-cr-crlf | 0 .../few-utf8-chars-crlf | 0 .../few-utf8-chars-lf | 0 .../many-utf8-chars-crlf | 0 .../many-utf8-chars-lf | 0 .../checkin_input_files => crlf}/mixed-lf-cr | 0 .../mixed-lf-cr-crlf | 0 .../checkin_input_files => crlf}/more-crlf | 0 .../more-crlf-utf8bom | 0 .../checkin_input_files => crlf}/more-lf | 0 .../more-lf-utf8bom | 0 .../checkin_input_files => crlf}/zero-byte | 0 tests/resources/generate_crlf.sh | 62 +++++++++ tests/resources/generate_crlf_checkin.sh | 118 ------------------ 22 files changed, 63 insertions(+), 118 deletions(-) create mode 100644 tests/resources/crlf/.gitattributes rename tests/resources/{crlf_data/checkin_input_files => crlf}/all-crlf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/all-crlf-utf8bom (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/all-lf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/all-lf-utf8bom (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/binary-all-crlf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/binary-all-lf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/binary-mixed-lf-cr (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/binary-mixed-lf-cr-crlf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/few-utf8-chars-crlf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/few-utf8-chars-lf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/many-utf8-chars-crlf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/many-utf8-chars-lf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/mixed-lf-cr (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/mixed-lf-cr-crlf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/more-crlf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/more-crlf-utf8bom (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/more-lf (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/more-lf-utf8bom (100%) rename tests/resources/{crlf_data/checkin_input_files => crlf}/zero-byte (100%) delete mode 100755 tests/resources/generate_crlf_checkin.sh diff --git a/tests/resources/crlf/.gitattributes b/tests/resources/crlf/.gitattributes new file mode 100644 index 000000000..fa1385d99 --- /dev/null +++ b/tests/resources/crlf/.gitattributes @@ -0,0 +1 @@ +* -text diff --git a/tests/resources/crlf_data/checkin_input_files/all-crlf b/tests/resources/crlf/all-crlf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/all-crlf rename to tests/resources/crlf/all-crlf diff --git a/tests/resources/crlf_data/checkin_input_files/all-crlf-utf8bom b/tests/resources/crlf/all-crlf-utf8bom similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/all-crlf-utf8bom rename to tests/resources/crlf/all-crlf-utf8bom diff --git a/tests/resources/crlf_data/checkin_input_files/all-lf b/tests/resources/crlf/all-lf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/all-lf rename to tests/resources/crlf/all-lf diff --git a/tests/resources/crlf_data/checkin_input_files/all-lf-utf8bom b/tests/resources/crlf/all-lf-utf8bom similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/all-lf-utf8bom rename to tests/resources/crlf/all-lf-utf8bom diff --git a/tests/resources/crlf_data/checkin_input_files/binary-all-crlf b/tests/resources/crlf/binary-all-crlf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/binary-all-crlf rename to tests/resources/crlf/binary-all-crlf diff --git a/tests/resources/crlf_data/checkin_input_files/binary-all-lf b/tests/resources/crlf/binary-all-lf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/binary-all-lf rename to tests/resources/crlf/binary-all-lf diff --git a/tests/resources/crlf_data/checkin_input_files/binary-mixed-lf-cr b/tests/resources/crlf/binary-mixed-lf-cr similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/binary-mixed-lf-cr rename to tests/resources/crlf/binary-mixed-lf-cr diff --git a/tests/resources/crlf_data/checkin_input_files/binary-mixed-lf-cr-crlf b/tests/resources/crlf/binary-mixed-lf-cr-crlf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/binary-mixed-lf-cr-crlf rename to tests/resources/crlf/binary-mixed-lf-cr-crlf diff --git a/tests/resources/crlf_data/checkin_input_files/few-utf8-chars-crlf b/tests/resources/crlf/few-utf8-chars-crlf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/few-utf8-chars-crlf rename to tests/resources/crlf/few-utf8-chars-crlf diff --git a/tests/resources/crlf_data/checkin_input_files/few-utf8-chars-lf b/tests/resources/crlf/few-utf8-chars-lf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/few-utf8-chars-lf rename to tests/resources/crlf/few-utf8-chars-lf diff --git a/tests/resources/crlf_data/checkin_input_files/many-utf8-chars-crlf b/tests/resources/crlf/many-utf8-chars-crlf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/many-utf8-chars-crlf rename to tests/resources/crlf/many-utf8-chars-crlf diff --git a/tests/resources/crlf_data/checkin_input_files/many-utf8-chars-lf b/tests/resources/crlf/many-utf8-chars-lf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/many-utf8-chars-lf rename to tests/resources/crlf/many-utf8-chars-lf diff --git a/tests/resources/crlf_data/checkin_input_files/mixed-lf-cr b/tests/resources/crlf/mixed-lf-cr similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/mixed-lf-cr rename to tests/resources/crlf/mixed-lf-cr diff --git a/tests/resources/crlf_data/checkin_input_files/mixed-lf-cr-crlf b/tests/resources/crlf/mixed-lf-cr-crlf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/mixed-lf-cr-crlf rename to tests/resources/crlf/mixed-lf-cr-crlf diff --git a/tests/resources/crlf_data/checkin_input_files/more-crlf b/tests/resources/crlf/more-crlf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/more-crlf rename to tests/resources/crlf/more-crlf diff --git a/tests/resources/crlf_data/checkin_input_files/more-crlf-utf8bom b/tests/resources/crlf/more-crlf-utf8bom similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/more-crlf-utf8bom rename to tests/resources/crlf/more-crlf-utf8bom diff --git a/tests/resources/crlf_data/checkin_input_files/more-lf b/tests/resources/crlf/more-lf similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/more-lf rename to tests/resources/crlf/more-lf diff --git a/tests/resources/crlf_data/checkin_input_files/more-lf-utf8bom b/tests/resources/crlf/more-lf-utf8bom similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/more-lf-utf8bom rename to tests/resources/crlf/more-lf-utf8bom diff --git a/tests/resources/crlf_data/checkin_input_files/zero-byte b/tests/resources/crlf/zero-byte similarity index 100% rename from tests/resources/crlf_data/checkin_input_files/zero-byte rename to tests/resources/crlf/zero-byte diff --git a/tests/resources/generate_crlf.sh b/tests/resources/generate_crlf.sh index db109ac08..068cad053 100755 --- a/tests/resources/generate_crlf.sh +++ b/tests/resources/generate_crlf.sh @@ -67,6 +67,63 @@ create_to_workdir_data() { fi } +create_to_odb_data() { + local input=$1 + local output=$2 + local tempdir=$3 + local systype=$4 + local autocrlf=$5 + local safecrlf=$6 + local attr=$7 + + local destdir="${output}/${systype}_to_odb/autocrlf_${autocrlf},safecrlf_${safecrlf}" + + if [ "$attr" != "" ]; then + local attrdir=`echo $attr | sed -e "s/ /,/g" | sed -e "s/=/_/g"` + destdir="${destdir},${attrdir}" + fi + + if [ "$tempdir" = "" ]; then + local workdir="${destdir}/_workdir" + else + local workdir="${tempdir}/generate_crlf_${RANDOM}" + fi + + echo "Creating ${destdir}" + mkdir -p "${destdir}" + + git init "${workdir}" >/dev/null + git --work-tree="${workdir}" --git-dir="${workdir}/.git" config core.autocrlf "${autocrlf}" + git --work-tree="${workdir}" --git-dir="${workdir}/.git" config core.safecrlf "${safecrlf}" + + if [ "$attr" != "" ]; then + echo "* ${attr}" > "${workdir}/.gitattributes" + fi + + cp ${input}/* ${workdir} + + for path in ${workdir}/*; do + filename=$(basename $path) + failed="" + output=$(git --work-tree="${workdir}" --git-dir="${workdir}/.git" add ${filename} 2>&1) || failed=1 + + if [ ! -z "${failed}" -a "${output:0:35}" == "fatal: LF would be replaced by CRLF" ]; then + echo "LF would be replaced by CRLF in '${filename}'" > "${destdir}/${filename}.fail" + elif [ ! -z "${failed}" -a "${output:0:35}" == "fatal: CRLF would be replaced by LF" ]; then + echo "CRLF would be replaced by LF in '${filename}'" > "${destdir}/${filename}.fail" + elif [ ! -z "${failed}" ]; then + echo "failed to add ${filename}: ${output}" 1>&2 + exit 1 + else + git --work-tree="${workdir}" --git-dir="${workdir}/.git" cat-file blob ":${filename}" > "${destdir}/${filename}" + fi + done + + if [ "$tempdir" != "" ]; then + rm -rf "${workdir}" + fi +} + if [[ `uname -s` == MINGW* ]]; then systype="windows" else @@ -80,6 +137,11 @@ for autocrlf in true false input; do create_to_workdir_data "${input}" "${output}" "${tempdir}" \ "${systype}" "${autocrlf}" "${attr}" + + for safecrlf in true false warn; do + create_to_odb_data "${input}" "${output}" "${tempdir}" \ + "${systype}" "${autocrlf}" "${safecrlf}" "${attr}" + done done done diff --git a/tests/resources/generate_crlf_checkin.sh b/tests/resources/generate_crlf_checkin.sh deleted file mode 100755 index d94ee99c6..000000000 --- a/tests/resources/generate_crlf_checkin.sh +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env bash -# -# This script will generate the test corpus for CR/LF data using git; -# we created files with all possible line ending varieties (all LF, all -# CRLF, mixed, etc) on all the possible line ending configurations -# (`core.autocrlf=true`, `text=auto` in gitattributes, etc) add add them -# to git and check the added hash. This allows us to validate that our -# configuration will match byte-for-byte the configuration that git produces. -# -# To update the test resource data, from the test resource directory: -# git rm -r ./crlf_data/checkin_results -# sh ./generate_crlf_checkin.sh ./crlf_data/checkin_input_files ./crlf_data/checkin_results /tmp/crlf_gitdirs -# git add ./crlf_data/checkin_results - -set -e - -if [ "$1" == "" -o "$2" == "" ]; then - echo "usage: $0 inputfiles-directory directory [tempdir]" - exit 1 -fi - -input=$1 -output=$2 -tempdir=$3 - -if [ ${input:1} != "/" ]; then - input="$PWD/$input" -fi - -if [ ${output:1} != "/" ]; then - output="$PWD/$output" -fi - -if [ "${tempdir}" != "" -a "${tempdir:1}" != "/" ]; then - tempdir="$PWD/$tempdir" -fi - -set -u - -create_test_data() { - local input=$1 - local output=$2 - local tempdir=$3 - local safecrlf=$4 - local autocrlf=$5 - local attr=$6 - - local destdir="${output}/safecrlf_${safecrlf},autocrlf_${autocrlf}" - - if [ "$attr" != "" ]; then - local attrdir=`echo $attr | sed -e "s/ /,/g" | sed -e "s/=/_/g"` - destdir="${destdir},${attrdir}" - fi - - if [ "$tempdir" = "" ]; then - tempdir="${output}/generate_crlf_${RANDOM}" - else - tempdir="${tempdir}/generate_crlf_${RANDOM}" - fi - - echo "Generating ${destdir}" - mkdir -p "${destdir}" - mkdir -p "${tempdir}" - - git init "${tempdir}" - if [ "$attr" != "" ]; then - echo "* ${attr}" > "${tempdir}/.gitattributes" - fi - cp "$input"/* "${tempdir}" - pushd "${tempdir}" - git config core.autocrlf ${autocrlf} - git config core.safecrlf ${safecrlf} - for file in * - do - process_file "$destdir" "$file" - done - popd - - rm -rf "$tempdir" -} - -function process_file() { - destdir=$1 - file=$2 - - rm -f "$destdir/$file.obj" "$destdir/$file.fail" - - set +e - OUTPUT=$(git add "$file" 2>&1) - if [ $? -ne 0 ]; then - set -e - touch "$destdir/$file.fail" - if [ "${OUTPUT:0:38}" == "fatal: CRLF would be replaced by LF in" ]; then - echo "CRLF would be replaced by LF" > "$destdir/$file.fail" - elif [ "${OUTPUT:0:38}" == "fatal: LF would be replaced by CRLF in" ]; then - echo "LF would be replaced by CRLF" > "$destdir/$file.fail" - fi - else - OBJ=$(git ls-files -s | cut -d ' ' -f 2) - set -e - echo "$OBJ" > "$destdir/$file.obj" - fi - rm -f .git/index -} - -export LC_ALL=C - -for safecrlf in true false warn; do - for autocrlf in true false input; do - for attr in "" text text=auto -text crlf -crlf eol=lf eol=crlf \ - "text eol=lf" "text eol=crlf" \ - "text=auto eol=lf" "text=auto eol=crlf"; do - - create_test_data "${input}" "${output}" "${tempdir}" \ - "${safecrlf}" "${autocrlf}" "${attr}" - done - done -done